diff --git a/resources/addresses/en.yaml b/resources/addresses/en.yaml
index 061756fc..b27d3550 100644
--- a/resources/addresses/en.yaml
+++ b/resources/addresses/en.yaml
@@ -1,26 +1,43 @@
 # en.yaml
 # -------
 # Supplement to the per-country address formats for English around the world.
-# Note: by default, we use the UK conventions as they cover more countries
-# US/Canada-specific conventions and any others (e.g. Hong Kong, Australia)
-# go in country overrides
-#
-# Remember, these rules only get applied sometimes with random probabilities
-# unless specified using "frequency: always" (which should almost never be used)
+# These configs are mostly used to generate training data we don't have from OSM
+# like flat/apartment numbers, intersections, etc. The configs aren't directly used by
+# the parser model itself, but can influence it as they affect its input.
+
+# Note: by default, we use the UK conventions for English as they cover more countries.
+# US/Canada-specific conventions and any others (e.g. Hong Kong, Australia) go in the
+# country overrides section. Each country can create its own copy of the entire top-level
+# structure and it will be recursively merged with the defaults.
 
 # Number
 # ======
 # Number, No., #, etc. can be used in both floor and apartment numbers,
 # so we'll define it separately
 
-number:
-    canonical: number # canonical word in libpostal dictionary
-    abbreviated: "no" # abbreviated form. no is a boolean in YAML, needs to be quoted
-    prefer_abbreviated: true # Use abbreviated form more often than the canonical form
-    sample: true # Randomly sample other variations (e.g. num, nr)
-numeric_abbreviated:
-    affix: "#" # e.g. #3, #2F, etc.
-    direction: left # affix goes on the number's left
+numbers:
+    default:
+        canonical: number # canonical word in libpostal dictionary
+        abbreviated: "no" # abbreviated form. no is a boolean in YAML, needs to be quoted
+        direction: left # The phrase "number/no" goes to the left of the number
+        sample: true # Randomly sample other variations (e.g. num, nr)
+        # Probabilities
+        canonical_probability: 0.3 # With this probability, use the canonical
+        abbreviated_probability: 0.5 # With this probability, use the abbreviated form
+        sample_probability: 0.2 # With  this probability, sample other variations
+        sample_exclude:
+            - "#" # Used in numeric affix
+    numeric:
+        direction: left
+    numeric_affix:
+        affix: "#" # e.g. #3, #2F, etc.
+        direction: left # affix goes on the number's left
+
+    # Probabilities for numbers
+    numeric_probability: 0.4 # With this probability, use the standard numeric
+    numeric_affix_probability: 0.6 # With this probability, use e.g. #3 instead of No. 3
+    affix_integers_only: false
+
 
 # Floor/level
 # ===========
@@ -41,95 +58,268 @@ numeric_abbreviated:
 # is designed to be cross-lingual, so we can use the same structure with different words
 # and do this for addresses in pretty much any language.
 
-level:
+levels:
     # Numbered floors
-    floor:
+    floor: &floor
         canonical: floor
         abbreviated: fl
+        canonical_probability: 0.5 # With this probability, use canonical version
+        abbreviated_probability: 0.4 # With this probability, use abbreviated version
+        sample_probability: 0.1 # With this probability, sample from the other forms
+        sample_exclude:
+            - / f # Exclude this abbreviation since it's used as an affix
         sample: true
         # e.g. Floor 1
         numeric:
             direction: left # Floor/Fl goes to the left of the number
-            use_number_phrase: true # Occasionally add variation of "number", e.g. Floor #1, Floor No. 1
+            direction_probability: 0.8 # With 1 - this probability, Floor/Fl goes on the other side of the number
+            add_number_phrase: true # Occasionally add variation of "number", e.g. Floor No. 1
+            add_number_phrase_probability: 0.4 # With this probability, use Floor No. 1 or Floor #1 vs. Floor 1
         # e.g. 2/F, 3/F
-        numeric_abbreviated:
+        numeric_affix:
             affix: /f
-            direction: right # affix goes to number's right
+            direction: right # affix goes to number's right (always)
          # e.g. 1st Floor
         ordinal:
+            direction: right # canonical or abbreviated form goes to the ordinal's right
+        # Probabilities
+        numeric_probability: 0.75 # Use the simple number e.g. Floor 1 (or Floor No. 1)
+        numeric_affix_probability: 0.05 # Use the 2/F (less common)
+        ordinal_probability: 0.2 # Use the ordinal e.g. 1st Floor
+    # The word "level" is also occasionally used
+    level: &level
+        canonical: level
+        abbreviated: lvl
+        sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        sample_exclude:
+            - / l # Exclude this abbreviation since it's used as an affix
+        numeric:
+            direction: left # Level/Lvl goes to the left of the number
+            direction_probability: 0.8 # With 1 - this probability, Level/Lvl goes on the other side of the number
+            add_number_phrase: true # Occasionally add variation of "number", e.g. Level No. 1
+            add_number_phrase_probability: 0.4 # With this probability, use Level No. 1 or Level #1 vs. Level 1
+        # e.g. 2/L, 3/L (ambiguous with left)
+        numeric_affix:
+            affix: /l
             direction: right
+        ordinal:
+            direction: right
+        numeric_probability: 0.4
+        numeric_affix_probability: 0.05
+        ordinal_probability: 0.55
+    platform: &platform
+        canonical: platform
+        abbreviated: pf
+        canonical_probability: 0.7
+        abbreviated_probability: 0.3
+        numeric:
+            direction: left
+        ordinal:
+            direction: right
+        numeric_probability: 0.5 # e.g. Platform 1
+        ordinal_probability: 0.5 # e.g. 1st Platform
+    storey: &storey
+        canonical: storey
+        numeric:
+            direction: left
+        ordinal:
+            direction: right
+        numeric_probability: 0.1 # e.g. Storey 2, less common
+        ordinal_probability: 0.9 # e.g. 2nd Storey, more common
     # Special instructions for ground floor
-    ground_floor:
-        number: 0 # the 0th floor is typically the ground level in the UK/Commonwealth
+    ground_floor: &ground_floor
         canonical: ground floor
         abbreviated: g/f
+        canonical_probability: 0.4
+        abbreviated_probability: 0.4
+        sample_probability: 0.2
         sample: true
     # Special instructions for lower ground floor (added randomly, not an alias for a floor number)
-    lower_ground_floor:
+    lower_ground_floor: &lower_ground_floor
         canonical: lower ground floor
         abbreviated: lg
         sample: true
+        # Probabilities
+        canonical_probability: 0.6
+        abbreviated_probability: 0.3
+        sample_probability: 0.1
     # Special instructions for upper ground floor (added randomly, not an alias for a floor number)
-    upper_ground_floor:
+    upper_ground_floor: &upper_ground_floor
         canonical: upper ground floor
         abbreviated: ug
         sample: true
+        # Probabilities
+        canonical_probability: 0.6
+        abbreviated_probability: 0.2
+        sample_probability: 0.2
+    upstairs: &upstairs
+        canonical: upstairs
+    downstairs: &downstairs
+        canonical: downstairs
     # Special instructions for podium level (added randomly)
-    podium_level:
-        canonical: podium level
-        abbreviated: pd lvl
-        sample: true
-        alternates:
-            - canonical: podium
-              abbreviated: pd
-              sample: true
+    podium_level: &podium_level
+        default:
+            canonical: podium level
+            abbreviated: pd lvl
+            sample: true
+            canonical_probability: 0.6
+            abbreviated_probability: 0.2
+            sample_probability: 0.2
+        probability: 0.8 # Probability of using the default form
+        alternatives:
+            - alternative:
+                  canonical: podium
+                  abbreviated: pd
+                  sample: true
+                  canonical_probability: 0.6
+                  abbreviated_probability: 0.2
+                  sample_probability: 0.2
+              probability: 0.2 # Probability of using this alternative
     # Used when floor number is < 0 (starts at -1 in all countries)
-    basement:
+    basement: &basement
         canonical: basement
         abbreviated: bsmt
         sample: true
         # e.g. Basement 1
-        numeric:
+        alphanumeric:
             direction: left
         # e.g. B1
-        numeric_abbreviated:
+        numeric_affix:
             affix: b
             direction: left
         # e.g. 2nd Basement
         ordinal:
             direction: right
+    cellar: &cellar
+        canonical: cellar
+        sample: true
+        canonical_probability: 0.8
+        sample_probability: 0.2
     # Floor number of <= -2 can be basement 2, sub-basement, sub-basement 1, etc.
-    sub_basement:
+    sub_basement: &sub_basement
         canonical: sub basement
         abbreviated: sb
         sample: true
         # e.g. Sub-basement 1
-        numeric:
+        alphanumeric:
             direction: left
         # e.g. SB1
-        numeric_abbreviated:
+        numeric_affix:
             affix: sb
             direction: left
         # e.g. 2nd Sub-basement
         ordinal:
             direction: right
+    top_floor: &top_floor
+        canonical: top floor
+        abbreviated: tf
+        sample: true
+        canonical_probability: 0.6
+        abbreviated_probability: 0.3
+        sample_probability: 0.1
     # Mezzanine level (floor number {0.5, 1.5, ...}, also be added at random)
-    mezzanine:
+    mezzanine: &mezzanine
         # Floor 0.5 is just plain mezzanine, no number
         canonical: mezzanine
         abbreviated: mezz
         half_floors: true
         sample: true
-        # Mezzanine/Mezz 2
-        numeric:
+        canonical_probability: 0.6
+        abbreviated_probability: 0.3
+        sample_probability: 0.1
+        # Mezzanine/Mezz 2 or Mezzanine/Mezz A
+        alphanumeric:
             direction: left
         # M2
-        numeric_abbreviated:
+        numeric_affix:
             affix: m
             direction: left
         # 2nd Mezzanine
         ordinal:
             direction: right
+        alphanumeric_probability: 0.3
+        numeric_affix_probability: 0.2
+        ordinal_probability: 0.5
+    aliases:
+        "-1":
+            default: *basement
+            probability: 0.7
+            alternatives:
+                - alternative: *cellar
+                  probability: 0.125
+                - alternative: *lower_ground_floor
+                  probability: 0.1
+                - alternative: *downstairs
+                  probability: 0.05
+                - alternative: *floor
+                  probability: 0.025
+        # Special token for half-floors
+        "*.5":
+            default: *mezzanine
+        "0":
+            default: *ground_floor
+            probability: 0.8
+            alternatives:
+                - alternative: *upper_ground_floor
+                  probability: 0.1
+                - alternative: *downstairs
+                  probability: 0.05
+                - alternative: *podium_level
+                  probability: 0.025
+                - alternative: *floor
+                  # Floor 0 is uncommon
+                  probability: 0.025
+        "1":
+            # Most of the time just say 1st Floor
+            default: *floor
+            probability: 0.9
+            alternatives:
+                - alternative: *upstairs
+                  probability: 0.1
+        top:
+            default: *floor
+            probability: 0.5
+            alternatives:
+                - alternative: *top_floor
+                  probability: 0.3
+
+    # Associated phrases for alphanumeric floors (Floor 1, Floor A)
+    alphanumeric:
+        default: *floor
+        probability: 0.8
+        add_number_phrase: true
+        add_number_phrase_probability: 0.3
+        alternatives:
+            - alternative: *level
+              probability: 0.15
+            - alternative: *platform
+              probability: 0.025
+            - alternative: *storey
+              probability: 0.025
+        numeric_probability: 0.99 # With this probability, pick an integer
+        alpha_probability: 0.0098 # With this probability, pick a letter e.g. Floor A
+        numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A
+        alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2
+
+    alphanumeric_probability: 0.5 # Probability of using simple alphanumeric
+    alias_probability: 0.5 # Probability of using aliases
+
+    # Floors are not part of the global address formats (and are not always standard)
+    # This is a list of places in the address where the floor number might go
+    order:
+        # e.g. 123 East 45th St, 6th Floor, NYC
+        - after: road
+          probability: 0.5
+        # e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
+        - before: house
+          probability: 0.25
+        # e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
+        - before: road
+          probability: 0.25
+
 
 # Intersections
 # =============
@@ -144,17 +334,24 @@ intersections:
     and:
         canonical: and
         abbreviated: "&"
+        canonical_probability: 0.4
+        abbreviated_probability: 0.6
         sample: true
     # 26th @ Broadway
     at:
         canonical: at
         abbreviated: "@"
+        canonical_probability: 0.7
+        abbreviated_probability: 0.3
         sample: true
     # 26th betw 5th Ave and 6th Ave
     between:
         canonical: between
         abbreviated: betw
+        canonical_probability: 0.5
+        abbreviated_probability: 0.5
         sample: true
+        parentheses_probability: 0.5 # Probability of using parentheses e.g. (between 5th and 6th)
 
 # PO Box addresses
 # ================
@@ -169,24 +366,63 @@ intersections:
 # words/phrases than the specific number i.e. numbers in the range 1000-9999
 # can simply be normalized to DDDD.
 
-po_box:
-    numeric:
+po_boxes:
+    po_box: &po_box
+        canonical: post office box
+        abbreviated: po box
+        sample: true
+        prefer_abbreviated: true
+
+    box: &box
+        canonical: box
+        sample: true
+
+    private_mail_box: &private_mail_box
+        canonical: private mail box
+        abbreviated: pmb
+        prefer_abbreviated: true
+        sample: true
+
+    alphanumeric:
         # Don't sample all the forms in post_office.txt as many of the PO box
         # phrases appear only in Australia
         sample: false
-        default:
-            canonical: post office box
-            abbreviated: po box
-            sample: true
-            prefer_abbreviated: true
-        # Alternative phrases to use
-        alternates:
-            - canonical: box
-              sample: true
-            - canonical: private mail box
-              abbreviated: pmb
-              prefer_abbreviated: true
-              sample: true
+        default: *po_box
+        probability: 0.95
+        alternatives:
+            - alternative: *box
+              probability: 0.05
+
+        add_number_phrase: true
+        add_number_phrase_probability: 0.4 # PO Box #1234
+
+        numeric_probability: 0.9 # 
+        alpha_probability: 0.01 # PO Box A
+        numeric_plus_alpha_probability: 0.04 # PO Box 123G
+
+        digits:
+            - length: 1
+              probability: 0.1
+            - length: 2
+              probability: 0.1
+            - length: 3
+              probability: 0.1
+            - length: 4
+              probability: 0.5
+            - length: 5
+              probability: 0.1
+            - length: 6
+              probability: 0.05
+
+    # Overrides for commercial/office areas (landuse=commercial in OSM)
+    commercial:
+        default: *po_box
+        probability: 0.7
+        alternatives:
+            - alternative: *private_mail_box
+              probability: 0.2
+            - alternative: *box
+              probability: 0.1
 
 # Categories
 # ==========
@@ -195,18 +431,33 @@ po_box:
 
 categories:
     near:
-        canonical: near
-        alternates:
-            - around
+        default:
+            canonical: near
+        probability: 0.8
+        alternatives:
+            - alternative:
+                  canonical: around
+              probability: 0.2
     nearby:
-        canonical: nearby
-        alternates:
-            - near here
-            - around here
+        default:
+            canonical: nearby
+        probability: 0.6
+        alternatives:
+            - alternative:
+                  canonical: near here
+              probability: 0.3
+            - alternative:
+                  canonical: around here
+              probability: 0.1
     near_me:
         canonical: near me
     in:
         canonical: in
+    # Probabilities of each phrase
+    near_probability: 0.35
+    nearby_probability: 0.2
+    near_me_probability: 0.1
+    in_probability: 0.35
 
 # Directions
 # ==========
@@ -215,44 +466,76 @@ categories:
 # like "East Entrance".
 
 directions:
-    right:
+    right: &right
         canonical: right
-        numeric_abbreviated:
+        abbreviated: r
+        canonical_probability: 0.7
+        abbreviated_probability: 0.3
+        numeric_affix:
             affix: r
             direction: right
-    left:
+    left: &left
         canonical: left
-        numeric_abbreviated:
+        abbreviated: l
+        canonical_probability: 0.7
+        abbreviated_probability: 0.3
+        numeric_affix:
             affix: l
             direction: right
-    rear:
+    rear: &rear
         canonical: rear
-        numeric_abbreviated:
+        abbreviated: r
+        canonical_probability: 0.8
+        abbreviated_probability: 0.2
+        numeric_affix:
             affix: r
             direction: right
-    front:
+    front: &front
         canonical: front
-        numeric_abbreviated:
+        abbreviated: frnt
+        canonical_probability: 0.8
+        abbreviated_probability: 0.2
+        numeric_affix:
             affix: f
             direction: right
-    east:
+    east: &east
         canonical: east
-        numeric_abbreviated:
+        abbreviated: e
+        sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        numeric_affix:
             affix: e
             direction: right
-    west:
+    west: &west
         canonical: west
-        numeric_abbreviated:
+        abbreviated: w
+        sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        numeric_affix:
             affix: w
             direction: right
-    north:
+    north: &north
         canonical: north
-        numeric_abbreviated:
+        abbreviated: n
+        sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        numeric_affix:
             affix: n
             direction: right
-    south:
+    south: &south
         canonical: south
-        numeric_abbreviated:
+        abbreviated: s
+        sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        numeric_affix:
             affix: s
             direction: right
 
@@ -260,23 +543,82 @@ directions:
 # ========
 # For deriving strings like "North Entrance"
 
-entrance:
-    canonical: entrance
-    abbreviated: ent
-    sample: true
+entrances:
+    entrance: &entrance
+        canonical: entrance
+        abbreviated: ent
+        sample: true
+        canonical_probability: 0.8
+        abbreviated_probability: 0.2
+
+    # Entrance 1, Entrance A, etc.
+    alphanumeric: &entrance_alphanumeric
+        default: *entrance
+
+    directional:
+        base: *entrance_alphanumeric
+        modifier:
+            direction: left # e.g. North Entrance
+            direction_probability: 0.9
+            alternatives:
+                - alternative: *north
+                - alternative: *south
+                - alternative: *east
+                - alternative: *west
+                - alternative: *right
+                - alternative: *left
+                - alternative: *rear
+                - alternative: *front
+                - alternative:
+                      canonical: freight
 
 # Staircase
 # =========
 # For deriving strings like "Staircase A" in apartment buildings
 
-staircase:
-    canonical: stair
-    sample: true
-    alternates:
-        - canonical: stairway
-          sample: true
-        - canonical: staircase
-          sample: true
+staircases:
+    stair: &stair
+        canonical: stair
+        sample: true
+
+    staircase: &staircase
+        canonical: staircase
+        sample: true
+
+    stairway: &stairway
+        canonical: stairway
+        sample: true
+
+    stairwell: &stairwell
+        canonical: stairwell
+        sample: true
+
+    alphanumeric: &staircase_alphanumeric
+        # For alphanumerics, Stair A, Stair 1, etc.
+        default: *stair
+        probability: 0.4
+        alternatives:
+            - alternative: *staircase
+              probability: 0.2
+            - alternative: *stairway
+              probability: 0.2
+            - alternative: *stairwell
+              probability: 0.2
+
+    directional:
+        base: *staircase_alphanumeric
+        modifier:
+            direction: left # e.g. Left Staircase
+            direction_probability: 0.7
+            alternatives:
+                - alternative: *north
+                - alternative: *south
+                - alternative: *east
+                - alternative: *west
+                - alternative: *right
+                - alternative: *left
+                - alternative: *rear
+                - alternative: *front
 
 
 # Unit types
@@ -285,68 +627,249 @@ staircase:
 # Just like thoroughfare types (Street, Avenue, etc.), there are many common ways to
 # refer to the 
 
-unit_types:
+units:
     # Units are not part of the global address formats (and are not always standard)
     # This is a list of places in the address where the unit line might go
     order:
         # e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
         - before: house
+          probability: 0.4
         # e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
         - before: road
-        # e.g. 123 East 45th St, Apt 6, NYC
-        - after: road
+          probability: 0.2
+        # e.g. Floor 5, Apt 6
+        - after: level
+          probability: 0.3
+        # e.g. Apt. 6, 5/F (less common)
+        - before: level
+          probability: 0.1
+
     # Special terms
     suite: &suite
         canonical: suite
+        plural: suites
         abbreviated: ste
-        sample: true
+        sample: true        
+        canonical_probability: 0.4
+        abbreviated_probability: 0.4
+        sample_probability: 0.2
+        # Suite #101 and Suite No. 101 as opposed to Suite 101
+        add_number_phrase: true
+        add_number_phrase_probability: 0.5
     penthouse: &penthouse
         canonical: penthouse
         abbreviated: ph
         sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        # Penthouse #1 and Penthouse No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.2
+    top_left: &top_left
+        canonical: top left
+        abbreviated: t/l
+        sample: true
+        canonical_probability: 0.4
+        abbreviated_probability: 0.3
+        sample_probability: 0.3
+    top_right: &top_right
+        canonical: top right
+        abbreviated: t/r
+        sample: true
+        canonical_probability: 0.4
+        abbreviated_probability: 0.3
+        sample_probability: 0.3
+    top_floor_right: &top_floor_right
+        canonical: top floor right
+        abbreviated: tfr
+        sample: true
+        canonical_probability: 0.2
+        abbreviated_probability: 0.5
+        sample_probability: 0.3
+    top_floor_left: &top_floor_left
+        canonical: top floor left
+        abbreviated: tfl
+        sample: true
+        canonical_probability: 0.2
+        abbreviated_probability: 0.5
+        sample_probability: 0.3
     office: &office
         canonical: office
+        plural: offices
         abbreviated: ofc
         sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.3
+        sample_probability: 0.2
+        # Office #1 and Office No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.7
     door: &door
         canonical: door
+        plural: doors
         sample: true
+        canonical_probability: 0.8
+        sample_probability: 0.2
+        # Door #1 and Door No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.2
     room: &room
         canonical: room
+        plural: rooms
         abbreviated: rm
         sample: true
+        canonical_probability: 0.5
+        abbreviated_probability: 0.5
+        # Room #1 and Room No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.6
     apartment: &apartment
         canonical: apartment
+        plural: apartments
         abbreviated: apt
         prefer_abbreviated: true
         sample: true
+        canonical_probability: 0.15
+        abbreviated_probability: 0.6
+        sample_probability: 0.25
+        # Apt #1 and Apt No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.4
     flat: &flat
         canonical: flat
+        plural: flats
         abbreviated: fl
         sample: true
+        canonical_probability: 0.8
+        abbreviated_probability: 0.1
+        sample_probability: 0.1
+        # Flat #1 and Flat No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.4
     lot: &lot
         canonical: lot
+        plural: lots
         sample: true
+        canonical_probability: 0.9
+        sample_probability: 0.1
+        # Lot #1 and Lot No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.6
+    parcel: &parcel
+        canonical: parcel
+        canonical_probability: 0.9
+        sample: true
+        sample_probability: 0.1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.6
     unit: &unit
         canonical: unit
         abbreviated: u
-        use_number_phrase: true
+        add_number_phrase: true
+        add_number_phrase_probability: 0.3
         sample: true
-    numeric:
+        canonical_probability: 0.8
+        abbreviated_probability: 0.1
+        sample_probability: 0.1
+        # Unit #1 and Unit No. 1
+        add_number_phrase: true
+        add_number_phrase_probability: 0.6
+    alphanumeric: &unit_alphanumeric
         # Many unit types that apply only in Australia
         # For most English-speaking countries, only use the terms defined above
         sample: false
-        use_number_phrase: true
-        use_direction_phrase: true
         default: *flat
-        alternates:
-            - *apartment
-            - *unit
-            - *door
-            - *room
-            - *office
-            - *penthouse
-            - *lot
+        probability: 0.6
+        alternatives:
+            - alternative: *unit
+              probability: 0.25
+            - alternative: *door
+              probability: 0.04
+            - alternative: *penthouse
+              probability: 0.01
+            - alternative: *apartment
+              probability: 0.1
+        numeric_probability: 0.8 # e.g. Flat 1
+        numeric_plus_alpha_probability: 0.05 # e.g. 1A
+        alpha_plus_numeric_probability: 0.05 # e.g. A1
+        alpha_probability: 0.1 # e.g. Flat A
+
+        # Separate random probability for adding directions like 2L, 2R, etc.
+        add_direction: true
+        add_direction_probability: 0.1
+        add_direction_numeric_only: true # Only for numbers
+
+    zone:
+        residential: *unit_alphanumeric
+        commercial:
+            default: *suite
+            probability: 0.8
+            alternatives:
+                - alternative: *office
+                  probability: 0.2
+        industrial:
+            default: *lot
+            probability: 0.7
+            alternatives:
+                - alternative: *suite
+                  probability: 0.15
+                - alternative: *unit
+                  probability: 0.1
+                - alternative: *parcel
+                  alternatives: 0.05
+        university:
+            default: *room
+        allotments:
+            lot:
+                default: *lot
+                numeric_probability: 0.8
+                alphanumeric_probability: 0.1
+                alpha_probability: 0.1
+            parcel:
+                default: *parcel
+                numeric_probability: 0.3
+                alphanumeric_probability: 0.3
+                alpha_probability: 0.4
+            lot_probability: 0.9
+            parcel_probability: 0.06
+            lot_plus_parcel_probability: 0.02
+            parcel_plus_lot_probability: 0.02
+
+    # For unit types like 2/34
+    combined:
+        component: house_number
+        direction: left
+
+    directional:
+        modifier:
+            direction: right # e.g. 1
+            numeric_probability: 0.1
+            numeric_affix_probability: 0.9
+            alternatives:
+                - alternative: *right
+                - alternative: *left
+                - alternative: *rear
+                - alternative: *front
+
+    standalone:
+        sample: false
+        default: *penthouse
+        probability: 0.4
+        alternatives:
+            - alternative: *top_right
+              probability: 0.15
+            - alternative: *top_left
+              probability: 0.15
+            - alternative: *top_floor_left
+              probability: 0.15
+            - alternative: *top_floor_right
+              probability: 0.15
+
+    # If no unit number is specified
+    alphanumeric_probability: 0.75
+    standalone_probability: 0.15
+    combined_probability: 0.1
 
 # Country-specific overrides
 # ==========================
@@ -355,32 +878,126 @@ unit_types:
 countries:
     # United States
     us:
-        level:
-            ground_floor:
-                number: 1
-        unit_types:
-            numeric:
+        levels:
+            storey: &story
+                canonical: story
+                numeric:
+                    direction: left
+                ordinal:
+                    direction: right
+                numeric_probability: 0.1 # e.g. Story 2, less common
+                ordinal_probability: 0.9 # e.g. 2nd Story, more common
+            alphanumeric:
+                default: *floor
+                probability: 0.6
+                alternatives:
+                    - alternative: *level
+                      probability: 0.3
+                    - alternative: *platform
+                      probability: 0.05
+                    - alternative: *story
+                      alternative: 0.05
+            aliases:
+                "1":
+                    default: *floor
+                    probability: 0.6
+                    alternatives:
+                        - alternative: *ground_floor
+                          probability: 0.3
+                        - alternative: *upper_ground_floor
+                          probability: 0.1
+                "2":
+                    # Most of the time just say 1st Floor
+                    default: *floor
+                    probability: 0.8
+                    alternatives:
+                        - alternative: *upstairs
+                          probability: 0.1
+        po_boxes:
+            concatenate_postcode:
+                po_box_max_digits: 4 # For PO boxes with max n digits
+                direction: left # Concatenate on the left side of the PO box
+                postcode_digits:
+                    length: 2 # use this many digits from the postal code
+                    direction: right
+            concatenate_postcode_probability: 0.01
+        postcodes:
+            concatenate_po_box:
+                append:
+                    separator: "-" # Use a hyphen separator
+                    direction: right # To the right of the postcode
+                digits:
+                    length: 4 # number of digits to append to the ZIP code
+                    pad:
+                        direction: left # left pad
+                        character: "0" # pad with 0s, e.g. for PO Box 52, use -0052
+            concatenate_po_box_probability: 0.1
+        units: &us_unit_types
+            alphanumeric: &us_units_alphanumeric
                 default: *apartment
-                country_alternates:
-                    - *flat
+                probability: 0.6
+                alternatives:
+                    - alternative: *unit
+                      probability: 0.3
+                    - alternative: *door
+                      probability: 0.02
+                    - alternative: *penthouse
+                      probability: 0.07
+                    - alternative: *flat
+                      probability: 0.01 # See this e.g. in Milwaukee with Polish flats
+
+            zone:
+                residential: *us_units_alphanumeric
+                commercial:
+                    default: *office
+                    probability: 0.5
+                    alternatives:
+                        - alternative: *suite # Suite is much more common in the US and Canada
+                          probability: 0.5
+                industrial:
+                    default: *lot
+                    probability: 0.6
+                university:
+                    default: *room
+
     # Canada
     # Note: this is Canadian English only. If the address is in French it will use the French config
     ca:
-        level:
-            ground_floor:
-                number: 1
-        unit_types:
-            numeric:
-                default: *apartment
-                country_alternates:
-                    - *flat
+        levels:
+            aliases:
+                "1":
+                    default: *floor
+                    probability: 0.6
+                    alternatives:
+                        - alternative: *ground_floor
+                          probability: 0.3
+                        - alternative: *upper_ground_floor
+                          probability: 0.1
+                "2":
+                    # Most of the time just say 1st Floor
+                    default: *floor
+                    probability: 0.8
+                    alternatives:
+                        - alternative: *upstairs
+                          probability: 0.1
+        # For (English-speaking) Canada, use the same unit types as in the US
+        units: *us_unit_types
     # Australia
     au:
-        po_box:
-            numeric:
+        po_boxes: &australia_po_boxes
+            alphanumeric:
                 # Australia has many strings for this e.g. Roadside Mail Bag
                 sample: true
-        unit_types:
-            numeric:
+                sample_probability: 0.05
+        units: &australia_unit_types
+            alphanumeric:
                 # Australia has all kinds of unit types (e.g. Marine Berth) not used elsewhere
                 sample: true
+                sample_probability: 0.2
+            standalone:
+                sample: true
+                sample_probability: 0.2
+    # New Zealand - same rules as Australia
+    nz:
+        po_boxes: *australia_po_boxes
+        units: *australia_unit_types