[addresses] Additions to the English address config

This commit is contained in:
Al
2016-04-14 00:56:39 -04:00
parent e37431912d
commit 8fdd3e9314

View File

@@ -18,25 +18,36 @@
numbers: numbers:
default: default:
canonical: number # canonical word in libpostal dictionary canonical: number # canonical word in libpostal dictionary
abbreviated: "no" # abbreviated form. no is a boolean in YAML, needs to be quoted abbreviated: "no" # most common abbreviated form ("no" is a boolean in YAML, needs to be quoted)
direction: left # The phrase "number/no" goes to the left of the number
sample: true # Randomly sample other variations (e.g. num, nr) sample: true # Randomly sample other variations (e.g. num, nr)
# Probabilities # Probabilities
canonical_probability: 0.3 # With this probability, use the canonical canonical_probability: 0.3 # With this probability, use the canonical
abbreviated_probability: 0.5 # With this probability, use the abbreviated form abbreviated_probability: 0.5 # With this probability, use the abbreviated form
sample_probability: 0.2 # With this probability, sample other variations sample_probability: 0.2 # With this probability, sample other variations
sample_exclude: sample_exclude:
- "#" # Used in numeric affix - "#" # Used in numeric affix. Needs to be quoted, otherwise it's a comment
numeric: numeric:
direction: left direction: left
numeric_affix: numeric_affix:
affix: "#" # e.g. #3, #2F, etc. affix: "#" # e.g. #3, #2F, etc.
direction: left # affix goes on the number's left direction: left # affix goes on the number's left
# Probabilities for numbers # Probabilities for numbers
numeric_probability: 0.4 # With this probability, use the standard numeric numeric_probability: 0.4 # With this probability, use the standard numeric
numeric_affix_probability: 0.6 # With this probability, use e.g. #3 instead of No. 3 numeric_affix_probability: 0.6 # With this probability, use e.g. #3 instead of No. 3
affix_integers_only: false
# And
# ===
# The word for "and". Used both in intersections and phrases like "Units 1 & 2", etc.
and:
default: &and
canonical: and
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
# Floor/level # Floor/level
@@ -62,6 +73,7 @@ levels:
# Numbered floors # Numbered floors
floor: &floor floor: &floor
canonical: floor canonical: floor
plural: floors
abbreviated: fl abbreviated: fl
canonical_probability: 0.5 # With this probability, use canonical version canonical_probability: 0.5 # With this probability, use canonical version
abbreviated_probability: 0.4 # With this probability, use abbreviated version abbreviated_probability: 0.4 # With this probability, use abbreviated version
@@ -89,6 +101,7 @@ levels:
# The word "level" is also occasionally used # The word "level" is also occasionally used
level: &level level: &level
canonical: level canonical: level
plural: levels
abbreviated: lvl abbreviated: lvl
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.5
@@ -112,6 +125,7 @@ levels:
ordinal_probability: 0.55 ordinal_probability: 0.55
platform: &platform platform: &platform
canonical: platform canonical: platform
plural: platforms
abbreviated: pf abbreviated: pf
canonical_probability: 0.7 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.3
@@ -123,12 +137,13 @@ levels:
ordinal_probability: 0.5 # e.g. 1st Platform ordinal_probability: 0.5 # e.g. 1st Platform
storey: &storey storey: &storey
canonical: storey canonical: storey
plural: storeys
numeric: numeric:
direction: left direction: left
ordinal: ordinal:
direction: right direction: right
numeric_probability: 0.1 # e.g. Storey 2, less common numeric_probability: 0.025 # e.g. Storey 2, less common
ordinal_probability: 0.9 # e.g. 2nd Storey, more common ordinal_probability: 0.975 # e.g. 2nd Storey, more common
# Special instructions for ground floor # Special instructions for ground floor
ground_floor: &ground_floor ground_floor: &ground_floor
canonical: ground floor canonical: ground floor
@@ -137,6 +152,20 @@ levels:
abbreviated_probability: 0.4 abbreviated_probability: 0.4
sample_probability: 0.2 sample_probability: 0.2
sample: true sample: true
ground: &ground
canonical: ground
abbreviated: g
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.1
sample_probability: 0.3
ground_level: &ground_level
canonical: ground level
abbreviated: g/l
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
# Special instructions for lower ground floor (added randomly, not an alias for a floor number) # Special instructions for lower ground floor (added randomly, not an alias for a floor number)
lower_ground_floor: &lower_ground_floor lower_ground_floor: &lower_ground_floor
canonical: lower ground floor canonical: lower ground floor
@@ -155,36 +184,48 @@ levels:
canonical_probability: 0.6 canonical_probability: 0.6
abbreviated_probability: 0.2 abbreviated_probability: 0.2
sample_probability: 0.2 sample_probability: 0.2
upper: &upper
canonical: upper
abbreviated: uppr
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
lower_level: &lower_level
canonical: lower level
abbreviated: lwr lvl
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
lobby: &lobby
canonical: lobby
upstairs: &upstairs upstairs: &upstairs
canonical: upstairs canonical: upstairs
downstairs: &downstairs downstairs: &downstairs
canonical: downstairs canonical: downstairs
# Special instructions for podium level (added randomly) # Special instructions for podium level (added randomly)
podium_level: &podium_level podium_level: &podium_level
default: canonical: podium level
canonical: podium level abbreviated: pd lvl
abbreviated: pd lvl sample: true
sample: true canonical_probability: 0.6
canonical_probability: 0.6 abbreviated_probability: 0.2
abbreviated_probability: 0.2 sample_probability: 0.2
sample_probability: 0.2 podium: &podium
probability: 0.8 # Probability of using the default form canonical: podium
alternatives: abbreviated: pd
- alternative: sample: true
canonical: podium canonical_probability: 0.6
abbreviated: pd abbreviated_probability: 0.2
sample: true sample_probability: 0.2
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
probability: 0.2 # Probability of using this alternative
# Used when floor number is < 0 (starts at -1 in all countries) # Used when floor number is < 0 (starts at -1 in all countries)
basement: &basement basement: &basement
canonical: basement canonical: basement
abbreviated: bsmt abbreviated: bsmt
sample: true sample: true
# e.g. Basement 1 # e.g. Basement 1
alphanumeric: numeric:
direction: left direction: left
# e.g. B1 # e.g. B1
numeric_affix: numeric_affix:
@@ -193,6 +234,12 @@ levels:
# e.g. 2nd Basement # e.g. 2nd Basement
ordinal: ordinal:
direction: right direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
cellar: &cellar cellar: &cellar
canonical: cellar canonical: cellar
sample: true sample: true
@@ -204,7 +251,7 @@ levels:
abbreviated: sb abbreviated: sb
sample: true sample: true
# e.g. Sub-basement 1 # e.g. Sub-basement 1
alphanumeric: numeric:
direction: left direction: left
# e.g. SB1 # e.g. SB1
numeric_affix: numeric_affix:
@@ -213,6 +260,14 @@ levels:
# e.g. 2nd Sub-basement # e.g. 2nd Sub-basement
ordinal: ordinal:
direction: right direction: right
number_abs_value: true
number_min_abs_value: 2
# Basement 2 == Sub-basement 1
number_subtract_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
top_floor: &top_floor top_floor: &top_floor
canonical: top floor canonical: top floor
abbreviated: tf abbreviated: tf
@@ -222,16 +277,14 @@ levels:
sample_probability: 0.1 sample_probability: 0.1
# Mezzanine level (floor number {0.5, 1.5, ...}, also be added at random) # Mezzanine level (floor number {0.5, 1.5, ...}, also be added at random)
mezzanine: &mezzanine mezzanine: &mezzanine
# Floor 0.5 is just plain mezzanine, no number
canonical: mezzanine canonical: mezzanine
abbreviated: mezz abbreviated: mezz
half_floors: true
sample: true sample: true
canonical_probability: 0.6 canonical_probability: 0.8
abbreviated_probability: 0.3 abbreviated_probability: 0.1
sample_probability: 0.1 sample_probability: 0.1
# Mezzanine/Mezz 2 or Mezzanine/Mezz A # Mezzanine/Mezz 2 or Mezzanine/Mezz A
alphanumeric: numeric:
direction: left direction: left
# M2 # M2
numeric_affix: numeric_affix:
@@ -240,51 +293,127 @@ levels:
# 2nd Mezzanine # 2nd Mezzanine
ordinal: ordinal:
direction: right direction: right
alphanumeric_probability: 0.3 # Floor 0.5 is just plain mezzanine, no number
numeric_affix_probability: 0.2 number_abs_value: true
ordinal_probability: 0.5 number_min_abs_value: 1
standalone_probability: 0.5
numeric_probability: 0.1
numeric_affix_probability: 0.1
ordinal_probability: 0.3
mezzanine_floor: &mezzanine_floor
canonical: mezzanine floor
abbreviated: mezz floor
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
mezzanine_level: &mezzanine_level
canonical: mezzanine level
abbreviated: mezz level
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
lower_mezzanine: &lower_mezzanine
canonical: lower mezzanine
abbreviated: lower mezz
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
upper_mezzanine: &upper_mezzanine
canonical: upper mezzanine
abbreviated: upper mezz
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
# Should be at least level 1.5
number_min_abs_value: 1
aliases: aliases:
"<-1":
default: *basement
probability: 0.6
alternatives:
- alternative: *sub_basement
probability: 0.3995
- alternative: *floor
probability: 0.0005
"-1": "-1":
default: *basement default: *basement
probability: 0.7 probability: 0.7
alternatives: alternatives:
- alternative: *cellar - alternative: *cellar
probability: 0.125 probability: 0.1
- alternative: *lower_ground_floor - alternative: *lower_ground_floor
probability: 0.1 probability: 0.1
- alternative: *downstairs - alternative: *downstairs
probability: 0.0495
- alternative: *lower_level
probability: 0.05 probability: 0.05
- alternative: *floor - alternative: *floor
probability: 0.025 probability: 0.0005
# Special token for half-floors # Special token for half-floors
"*.5": half_floors:
default: *mezzanine default: *mezzanine
"0":
default: *ground_floor
probability: 0.8 probability: 0.8
alternatives: alternatives:
- alternative: *upper_ground_floor - alternative: *mezzanine_floor
probability: 0.1 probability: 0.1
- alternative: *downstairs - alternative: *mezzanine_level
probability: 0.05 probability: 0.1
- alternative: *podium_level aliases:
"1":
default: *upper_mezzanine
probability: 0.5
alternatives:
- alternative: *mezzanine
probability: 0.5
half_floors_negative:
default: *lower_mezzanine
"0":
default: *ground_floor
probability: 0.9
alternatives:
- alternative: *ground
probability: 0.02
- alternative: *ground_level
probability: 0.01
- alternative: *lower_ground_floor
probability: 0.025 probability: 0.025
- alternative: *upper_ground_floor
probability: 0.025
- alternative: *lobby
probability: 0.005
- alternative: *floor - alternative: *floor
# Floor 0 is uncommon # Floor 0 is uncommon
probability: 0.025 probability: 0.01
- alternative: *level
probability: 0.005
"1": "1":
# Most of the time just say 1st Floor # Most of the time just say 1st Floor
default: *floor default: *floor
probability: 0.9 probability: 0.9
alternatives: alternatives:
- alternative: *upper_ground_floor
probability: 0.075
- alternative: *podium_level
probability: 0.01
- alternative: *podium
probability: 0.005
- alternative: *upstairs - alternative: *upstairs
probability: 0.1 probability: 0.01
top: top:
default: *floor default: *floor
probability: 0.5 probability: 0.85
alternatives: alternatives:
- alternative: *level
probability: 0.1
- alternative: *top_floor - alternative: *top_floor
probability: 0.3 probability: 0.05
# Integer for whether floors start at 0 or 1
numbering_starts_at: 0
# Associated phrases for alphanumeric floors (Floor 1, Floor A) # Associated phrases for alphanumeric floors (Floor 1, Floor A)
alphanumeric: alphanumeric:
@@ -304,8 +433,6 @@ levels:
numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A
alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2 alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2
alphanumeric_probability: 0.5 # Probability of using simple alphanumeric
alias_probability: 0.5 # Probability of using aliases
# Floors are not part of the global address formats (and are not always standard) # Floors are not part of the global address formats (and are not always standard)
# This is a list of places in the address where the floor number might go # This is a list of places in the address where the floor number might go
@@ -331,12 +458,7 @@ levels:
intersections: intersections:
# 26th & 6th Avenue # 26th & 6th Avenue
and: and: *and
canonical: and
abbreviated: "&"
canonical_probability: 0.4
abbreviated_probability: 0.6
sample: true
# 26th @ Broadway # 26th @ Broadway
at: at:
canonical: at canonical: at
@@ -395,18 +517,17 @@ po_boxes:
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 # PO Box #1234 add_number_phrase_probability: 0.4 # PO Box #1234
numeric_probability: 0.9 # numeric_probability: 0.9 #
alpha_probability: 0.01 # PO Box A alpha_probability: 0.01 # PO Box A
numeric_plus_alpha_probability: 0.04 # PO Box 123G numeric_plus_alpha_probability: 0.04 # PO Box 123G
digits: digits:
- length: 1 - length: 1
probability: 0.1 probability: 0.05
- length: 2 - length: 2
probability: 0.1 probability: 0.1
- length: 3 - length: 3
probability: 0.1 probability: 0.2
- length: 4 - length: 4
probability: 0.5 probability: 0.5
- length: 5 - length: 5
@@ -414,15 +535,16 @@ po_boxes:
- length: 6 - length: 6
probability: 0.05 probability: 0.05
# Overrides for commercial/office areas (landuse=commercial in OSM) zones:
commercial: # Overrides for commercial/office areas (landuse=commercial in OSM)
default: *po_box commercial:
probability: 0.7 default: *po_box
alternatives: probability: 0.7
- alternative: *private_mail_box alternatives:
probability: 0.2 - alternative: *private_mail_box
- alternative: *box probability: 0.2
probability: 0.1 - alternative: *box
probability: 0.1
# Categories # Categories
# ========== # ==========
@@ -724,6 +846,8 @@ units:
# Room #1 and Room No. 1 # Room #1 and Room No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
hall: &hall
canonical: hall
apartment: &apartment apartment: &apartment
canonical: apartment canonical: apartment
plural: apartments plural: apartments
@@ -820,6 +944,10 @@ units:
alternatives: 0.05 alternatives: 0.05
university: university:
default: *room default: *room
probability: 0.9
alternatives:
- alternative: *hall
probability: 0.1
allotments: allotments:
lot: lot:
default: *lot default: *lot
@@ -885,19 +1013,20 @@ countries:
direction: left direction: left
ordinal: ordinal:
direction: right direction: right
numeric_probability: 0.1 # e.g. Story 2, less common numeric_probability: 0.025 # e.g. Story 2, less common
ordinal_probability: 0.9 # e.g. 2nd Story, more common ordinal_probability: 0.975 # e.g. 2nd Story, more common
alphanumeric: alphanumeric:
default: *floor default: *floor
probability: 0.6 probability: 0.8
alternatives: alternatives:
- alternative: *level - alternative: *level
probability: 0.3 probability: 0.15
- alternative: *platform - alternative: *platform
probability: 0.05 probability: 0.025
- alternative: *story - alternative: *story
alternative: 0.05 probability: 0.025
aliases: numbering_starts_at: 1
aliases: &us_floor_aliases
"1": "1":
default: *floor default: *floor
probability: 0.6 probability: 0.6
@@ -907,9 +1036,9 @@ countries:
- alternative: *upper_ground_floor - alternative: *upper_ground_floor
probability: 0.1 probability: 0.1
"2": "2":
# Most of the time just say 1st Floor # Most of the time just say 2nd Floor
default: *floor default: *floor
probability: 0.8 probability: 0.9
alternatives: alternatives:
- alternative: *upstairs - alternative: *upstairs
probability: 0.1 probability: 0.1
@@ -961,25 +1090,14 @@ countries:
default: *room default: *room
# Canada # Canada
# Note: this is Canadian English only. If the address is in French it will use the French config # Specifically Canadian English. If the address is in French it will use fr.yaml
ca: ca:
levels: levels:
aliases: # Note: Canadian English uses "storey" keeping with the British convention, so no need to change that
"1":
default: *floor # In Canada first floor is the ground floor, as in the US
probability: 0.6 numbering_starts_at: 1
alternatives: aliases: *us_floor_aliases
- alternative: *ground_floor
probability: 0.3
- alternative: *upper_ground_floor
probability: 0.1
"2":
# Most of the time just say 1st Floor
default: *floor
probability: 0.8
alternatives:
- alternative: *upstairs
probability: 0.1
# For (English-speaking) Canada, use the same unit types as in the US # For (English-speaking) Canada, use the same unit types as in the US
units: *us_unit_types units: *us_unit_types
# Australia # Australia