[addresses] English sub-building component probabilities and some fixes

This commit is contained in:
Al
2016-05-18 02:46:11 -04:00
parent 8652f74fef
commit fb286b48db

View File

@@ -10,6 +10,112 @@
# country overrides section. Each country can create its own copy of the entire top-level # country overrides section. Each country can create its own copy of the entire top-level
# structure and it will be recursively merged with the defaults. # structure and it will be recursively merged with the defaults.
# Components
# ==========
# How likely we are to generate a component at random
components:
po_box:
null_probability: 0.9
alphanumeric_probability: 0.1
conditional:
- component: level
probabilities:
null_probability: 0.995
alphanumeric_probability: 0.005
- component: unit
probabilities:
null_probability: 0.99
alphanumeric_probability: 0.01
- component: staircase
probabilities:
null_probability: 0.999
alphanumeric_probability: 0.001
- component: entrance
probabilities:
null_probability: 0.999
alphanumeric_probability: 0.001
level:
# If no floor number is specified
null_probability: 0.85
alphanumeric_probability: 0.15
# Conditional probabilities
conditional:
# e.g. given that we have unit already (natural or generated)
- component: unit
probabilities:
null_probability: 0.95
alphanumeric_probability: 0.05
- component: staircase
probabilities:
null_probability: 0.6
alphanumeric_probability: 0.4
entrance:
null_probability: 0.9994
alphanumeric_probability: 0.0005
directional_probability: 0.0001
conditional:
- component: staircase
probabilities:
null_probability: 0.99995
alphanumeric_probability: 0.00005
- component: level
probabilities:
null_probability: 0.9995
alphanumeric_probability: 0.0005
staircase:
null_probability: 0.9989
alphanumeric_probability: 0.001
directional_probability: 0.0001
unit:
# If no unit number is specified
null_probability: 0.4
alphanumeric_probability: 0.55
standalone_probability: 0.05
conditional:
- component: level
probabilities:
null_probability: 0.95
alphanumeric_probability: 0.05
- component: staircase
probabilities:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
# For unit types like 2/34 (more common in Canada and Australia)
house_number_unit:
components:
- house_number
- unit
label: house_number
separators:
- separator: /
probability: 0.8
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.005
level_unit:
components:
- level
- unit
label: unit
separators:
- separator: /
probability: 0.1
- separator: "-"
probability: 0.8
- separator: " - "
probability: 0.1
probability: 0.001
# Number # Number
# ====== # ======
# Number, No., #, etc. can be used in both floor and apartment numbers, # Number, No., #, etc. can be used in both floor and apartment numbers,
@@ -432,20 +538,10 @@ levels:
alpha_probability: 0.0098 # With this probability, pick a letter e.g. Floor A alpha_probability: 0.0098 # With this probability, pick a letter e.g. Floor A
numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A numeric_plus_alpha_probability: 0.0001 # e.g. Floor 2A
alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2 alpha_plus_numeric_probability: 0.0001 # e.g. Floor A2
numeric_plus_alpha:
whitespace_probability: 0.1
# Floors are not part of the global address formats (and are not always standard) alpha_plus_numeric:
# This is a list of places in the address where the floor number might go whitespace_probability: 0.1
order:
# e.g. 123 East 45th St, 6th Floor, NYC
- after: road
probability: 0.5
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
- before: house
probability: 0.25
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
- before: road
probability: 0.25
# Intersections # Intersections
@@ -514,8 +610,6 @@ po_boxes:
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 # PO Box #1234 add_number_phrase_probability: 0.4 # PO Box #1234
numeric_probability: 1.0
box: &box box: &box
canonical: box canonical: box
sample: true sample: true
@@ -526,8 +620,6 @@ po_boxes:
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 # Box #1234 add_number_phrase_probability: 0.4 # Box #1234
numeric_probability: 1.0
private_mail_box: &private_mail_box private_mail_box: &private_mail_box
canonical: private mail box canonical: private mail box
abbreviated: pmb abbreviated: pmb
@@ -542,8 +634,6 @@ po_boxes:
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 # PMB #1234 add_number_phrase_probability: 0.4 # PMB #1234
numeric_probability: 1.0
alphanumeric: alphanumeric:
# Don't sample all the forms in post_office.txt as many of the PO box # Don't sample all the forms in post_office.txt as many of the PO box
# phrases appear only in Australia # phrases appear only in Australia
@@ -558,8 +648,11 @@ po_boxes:
alpha_probability: 0.05 # PO Box A alpha_probability: 0.05 # PO Box A
numeric_plus_alpha_probability: 0.04 # PO Box 123G numeric_plus_alpha_probability: 0.04 # PO Box 123G
alpha_plus_numeric_probability: 0.01 # PO Box A123 alpha_plus_numeric_probability: 0.01 # PO Box A123
alpha_plus_numeric_whitespace_probability: 0.1
numeric_plus_alpha_whitespace_probability: 0.1 alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits: digits:
- length: 1 - length: 1
@@ -575,6 +668,8 @@ po_boxes:
- length: 6 - length: 6
probability: 0.05 probability: 0.05
zones: zones:
# Overrides for commercial/office areas (landuse=commercial in OSM) # Overrides for commercial/office areas (landuse=commercial in OSM)
commercial: commercial:
@@ -586,12 +681,6 @@ po_boxes:
- alternative: *box - alternative: *box
probability: 0.1 probability: 0.1
order:
- after: house
probability: 0.8
- before: house
probability: 0.2
# Categories # Categories
# ========== # ==========
# Use the operators "in" and "near" for building category queries # Use the operators "in" and "near" for building category queries
@@ -703,9 +792,9 @@ cardinal_directions:
canonical: east canonical: east
abbreviated: e abbreviated: e
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.2
sample_probability: 0.2 sample_probability: 0.1
numeric: numeric:
direction: right direction: right
numeric_affix: numeric_affix:
@@ -717,9 +806,9 @@ cardinal_directions:
canonical: west canonical: west
abbreviated: w abbreviated: w
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.2
sample_probability: 0.2 sample_probability: 0.1
numeric: numeric:
direction: right direction: right
numeric_affix: numeric_affix:
@@ -731,9 +820,9 @@ cardinal_directions:
canonical: north canonical: north
abbreviated: n abbreviated: n
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.2
sample_probability: 0.2 sample_probability: 0.1
numeric: numeric:
direction: right direction: right
numeric_affix: numeric_affix:
@@ -745,9 +834,9 @@ cardinal_directions:
canonical: south canonical: south
abbreviated: s abbreviated: s
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.2
sample_probability: 0.2 sample_probability: 0.1
numeric: numeric:
direction: right direction: right
numeric_affix: numeric_affix:
@@ -777,27 +866,38 @@ entrances:
sample: true sample: true
canonical_probability: 0.8 canonical_probability: 0.8
abbreviated_probability: 0.2 abbreviated_probability: 0.2
numeric:
direction: left
# Entrance 1, Entrance A, etc. # Entrance 1, Entrance A, etc.
alphanumeric: &entrance_alphanumeric alphanumeric: &entrance_alphanumeric
default: *entrance default: *entrance
numeric_probability: 0.1 # e.g. Entrance 1
alpha_probability: 0.85 # e.g. Entrnace A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
directional: alpha_plus_numeric:
base: *entrance_alphanumeric whitespace_probability: 0.1
modifier:
direction: left # e.g. North Entrance numeric_plus_alpha:
direction_probability: 0.9 whitespace_probability: 0.1
alternatives:
- alternative: *north directional:
- alternative: *south modifier:
- alternative: *east direction: left # e.g. North Entrance
- alternative: *west direction_probability: 0.9
- alternative: *right alternatives:
- alternative: *left - alternative: *north
- alternative: *rear - alternative: *south
- alternative: *front - alternative: *east
- alternative: - alternative: *west
canonical: freight - alternative: *right
- alternative: *left
- alternative: *rear
- alternative: *front
- alternative:
canonical: freight
# Staircase # Staircase
# ========= # =========
@@ -807,18 +907,34 @@ staircases:
stair: &stair stair: &stair
canonical: stair canonical: stair
sample: true sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
staircase: &staircase staircase: &staircase
canonical: staircase canonical: staircase
sample: true sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
stairway: &stairway stairway: &stairway
canonical: stairway canonical: stairway
sample: true sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
stairwell: &stairwell stairwell: &stairwell
canonical: stairwell canonical: stairwell
sample: true sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &staircase_alphanumeric alphanumeric: &staircase_alphanumeric
# For alphanumerics, Stair A, Stair 1, etc. # For alphanumerics, Stair A, Stair 1, etc.
@@ -831,22 +947,30 @@ staircases:
probability: 0.2 probability: 0.2
- alternative: *stairwell - alternative: *stairwell
probability: 0.2 probability: 0.2
numeric_probability: 0.1 # e.g. Staircase 1
alpha_probability: 0.85 # e.g. Staircase A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
directional: alpha_plus_numeric:
base: *staircase_alphanumeric whitespace_probability: 0.1
modifier:
direction: left # e.g. Left Staircase numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left # e.g. Left Staircase, North Tower
direction_probability: 0.7 direction_probability: 0.7
alternatives: modifier:
- alternative: *north alternatives:
- alternative: *south - alternative: *north
- alternative: *east - alternative: *south
- alternative: *west - alternative: *east
- alternative: *right - alternative: *west
- alternative: *left - alternative: *right
- alternative: *rear - alternative: *left
- alternative: *front - alternative: *rear
- alternative: *front
# Unit types # Unit types
# ========== # ==========
@@ -855,25 +979,6 @@ staircases:
# refer to the # refer to the
units: units:
# Units are not part of the global address formats (and are not always standard)
# This is a list of places in the address where the unit line might go
order:
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
- before: house
probability: 0.2
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
- before: road
probability: 0.6
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
- after: road
probability: 0.1
# e.g. Floor 5, Apt 6
- after: level
probability: 0.09
# e.g. Apt. 6, 5/F (less common)
- before: level
probability: 0.01
# Special terms # Special terms
suite: &suite suite: &suite
canonical: suite canonical: suite
@@ -889,9 +994,9 @@ units:
abbreviated_probability: 0.4 abbreviated_probability: 0.4
numeric: numeric:
direction: left direction: left
# Suite #101 and Suite No. 101 as opposed to Suite 101 # Suite #101 and Suite No. 101 as opposed to Suite 101
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.5 add_number_phrase_probability: 0.5
penthouse: &penthouse penthouse: &penthouse
canonical: penthouse canonical: penthouse
abbreviated: ph abbreviated: ph
@@ -901,13 +1006,16 @@ units:
sample_probability: 0.2 sample_probability: 0.2
plural: plural:
canonical: penthouses canonical: penthouses
standalone_probability: 1.0
penthouse_numeric: &penthouse_numeric
<<: *penthouse
numeric: numeric:
direction: left direction: left
numeric_probability: 0.2 # Penthouse #1 and Penthouse No. 1
standalone_probability: 0.8 add_number_phrase: true
# Penthouse #1 and Penthouse No. 1 add_number_phrase_probability: 0.2
add_number_phrase: true numeric_probability: 1.0
add_number_phrase_probability: 0.2 standalone_probability: 0.0
top_left: &top_left top_left: &top_left
canonical: top left canonical: top left
abbreviated: t/l abbreviated: t/l
@@ -950,9 +1058,9 @@ units:
abbreviated_probability: 0.6 abbreviated_probability: 0.6
numeric: numeric:
direction: left direction: left
# Office #1 and Office No. 1 # Office #1 and Office No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.7 add_number_phrase_probability: 0.7
door: &door door: &door
canonical: door canonical: door
sample: true sample: true
@@ -962,9 +1070,9 @@ units:
canonical: doors canonical: doors
numeric: numeric:
direction: left direction: left
# Door #1 and Door No. 1 # Door #1 and Door No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.2 add_number_phrase_probability: 0.2
room: &room room: &room
canonical: room canonical: room
abbreviated: rm abbreviated: rm
@@ -978,18 +1086,18 @@ units:
abbreviated_probability: 0.4 abbreviated_probability: 0.4
numeric: numeric:
direction: left direction: left
# Room #1 and Room No. 1 # Room #1 and Room No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
hall: &hall hall: &hall
canonical: hall canonical: hall
plural: plural:
canonical: halls canonical: halls
numeric: numeric:
direction: left direction: left
# Room #1 and Room No. 1 # Room #1 and Room No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
apartment: &apartment apartment: &apartment
canonical: apartment canonical: apartment
abbreviated: apt abbreviated: apt
@@ -1005,9 +1113,9 @@ units:
abbreviated: 0.8 abbreviated: 0.8
numeric: numeric:
direction: left direction: left
# Apt #1 and Apt No. 1 # Apt #1 and Apt No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 add_number_phrase_probability: 0.4
flat: &flat flat: &flat
canonical: flat canonical: flat
abbreviated: flt abbreviated: flt
@@ -1022,9 +1130,9 @@ units:
abbreviated_probability: 0.2 abbreviated_probability: 0.2
numeric: numeric:
direction: left direction: left
# Flat #1 and Flat No. 1 # Flat #1 and Flat No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 add_number_phrase_probability: 0.4
lot: &lot lot: &lot
canonical: lot canonical: lot
sample: true sample: true
@@ -1034,9 +1142,9 @@ units:
canonical: lots canonical: lots
numeric: numeric:
direction: left direction: left
# Lot #1 and Lot No. 1 # Lot #1 and Lot No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
parcel: &parcel parcel: &parcel
canonical: parcel canonical: parcel
sample: true sample: true
@@ -1046,8 +1154,8 @@ units:
canonical: parcels canonical: parcels
numeric: numeric:
direction: left direction: left
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
unit: &unit unit: &unit
canonical: unit canonical: unit
abbreviated: u abbreviated: u
@@ -1059,9 +1167,9 @@ units:
canonical: units canonical: units
numeric: numeric:
direction: left direction: left
# Unit #1 and Unit No. 1 # Unit #1 and Unit No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 add_number_phrase_probability: 0.4
alphanumeric: &unit_alphanumeric alphanumeric: &unit_alphanumeric
# Many unit types that apply only in Australia # Many unit types that apply only in Australia
# For most English-speaking countries, only use the terms defined above # For most English-speaking countries, only use the terms defined above
@@ -1076,7 +1184,7 @@ units:
probability: 0.2 probability: 0.2
- alternative: *door - alternative: *door
probability: 0.04 probability: 0.04
- alternative: *penthouse - alternative: *penthouse_numeric
probability: 0.01 probability: 0.01
- alternative: *apartment - alternative: *apartment
probability: 0.1 probability: 0.1
@@ -1084,8 +1192,11 @@ units:
numeric_plus_alpha_probability: 0.03 # e.g. 1A numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1 alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Flat A alpha_probability: 0.04 # e.g. Flat A
alpha_plus_numeric_whitespace_probability: 0.1
numeric_plus_alpha_whitespace_probability: 0.1 alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2L, 2R, etc. # Separate random probability for adding directions like 2L, 2R, etc.
add_direction: true add_direction: true
@@ -1160,23 +1271,6 @@ units:
- alternative: *top_floor_right - alternative: *top_floor_right
probability: 0.15 probability: 0.15
# For unit types like 2/34 (more common in Canada and Australia)
combined:
component: house_number
direction: right
separators:
- separator: /
probability: 0.8
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
# If no unit number is specified
alphanumeric_probability: 0.75
standalone_probability: 0.2495
combined_probability: 0.005
# Country-specific overrides # Country-specific overrides
# ========================== # ==========================
# For each country, we allow a copy of the structures listed above # For each country, we allow a copy of the structures listed above
@@ -1245,10 +1339,12 @@ countries:
probability: 0.6 probability: 0.6
alternatives: alternatives:
- alternative: *unit - alternative: *unit
probability: 0.3 probability: 0.1
- alternative: *number
probability: 0.2
- alternative: *door - alternative: *door
probability: 0.02 probability: 0.02
- alternative: *penthouse - alternative: *penthouse_numeric
probability: 0.07 probability: 0.07
- alternative: *flat - alternative: *flat
probability: 0.01 # See this e.g. in Milwaukee with Polish flats probability: 0.01 # See this e.g. in Milwaukee with Polish flats
@@ -1270,6 +1366,9 @@ countries:
# Canada # Canada
# Specifically Canadian English. If the address is in French it will use fr.yaml # Specifically Canadian English. If the address is in French it will use fr.yaml
ca: ca:
combinations:
house_number_unit:
probability: 0.3
levels: levels:
# Note: Canadian English uses "storey" keeping with the British convention, so no need to change that # Note: Canadian English uses "storey" keeping with the British convention, so no need to change that