[addresses] changing plurals to use the standard probability structure

This commit is contained in:
Al
2016-04-18 15:12:59 -04:00
parent d0fb0d413d
commit 848b7ac167

View File

@@ -16,7 +16,7 @@
# so we'll define it separately # so we'll define it separately
numbers: numbers:
default: default: &number
canonical: number # canonical word in libpostal dictionary canonical: number # canonical word in libpostal dictionary
abbreviated: "no" # most common abbreviated form ("no" is a boolean in YAML, needs to be quoted) abbreviated: "no" # most common abbreviated form ("no" is a boolean in YAML, needs to be quoted)
sample: true # Randomly sample other variations (e.g. num, nr) sample: true # Randomly sample other variations (e.g. num, nr)
@@ -466,7 +466,7 @@ cross_streets:
canonical_probability: 0.7 canonical_probability: 0.7
abbreviated_probability: 0.3 abbreviated_probability: 0.3
sample: true sample: true
corner_of: corner_of: &corner_of
canonical: corner of canonical: corner of
intersection: intersection:
@@ -658,6 +658,8 @@ directions:
numeric_affix: numeric_affix:
affix: f affix: f
direction: right direction: right
cardinal_directions:
east: &east east: &east
canonical: east canonical: east
abbreviated: e abbreviated: e
@@ -807,12 +809,16 @@ units:
# Special terms # Special terms
suite: &suite suite: &suite
canonical: suite canonical: suite
plural: suites
abbreviated: ste abbreviated: ste
sample: true sample: true
canonical_probability: 0.4 canonical_probability: 0.4
abbreviated_probability: 0.4 abbreviated_probability: 0.4
sample_probability: 0.2 sample_probability: 0.2
plural:
canonical: suites
abbreviated: stes
canonical_probability: 0.6
abbreviated_probability: 0.4
# Suite #101 and Suite No. 101 as opposed to Suite 101 # Suite #101 and Suite No. 101 as opposed to Suite 101
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.5 add_number_phrase_probability: 0.5
@@ -823,6 +829,8 @@ units:
canonical_probability: 0.5 canonical_probability: 0.5
abbreviated_probability: 0.3 abbreviated_probability: 0.3
sample_probability: 0.2 sample_probability: 0.2
plural:
canonical: penthouses
# Penthouse #1 and Penthouse No. 1 # Penthouse #1 and Penthouse No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.2 add_number_phrase_probability: 0.2
@@ -856,65 +864,85 @@ units:
sample_probability: 0.3 sample_probability: 0.3
office: &office office: &office
canonical: office canonical: office
plural: offices
abbreviated: ofc abbreviated: ofc
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.5
abbreviated_probability: 0.3 abbreviated_probability: 0.3
sample_probability: 0.2 sample_probability: 0.2
plural:
canonical: offices
abbreviated: ofcs
canonical_probability: 0.4
abbreviated_probability: 0.6
# Office #1 and Office No. 1 # Office #1 and Office No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.7 add_number_phrase_probability: 0.7
door: &door door: &door
canonical: door canonical: door
plural: doors
sample: true sample: true
canonical_probability: 0.8 canonical_probability: 0.8
sample_probability: 0.2 sample_probability: 0.2
plural:
canonical: doors
# Door #1 and Door No. 1 # Door #1 and Door No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.2 add_number_phrase_probability: 0.2
room: &room room: &room
canonical: room canonical: room
plural: rooms
abbreviated: rm abbreviated: rm
sample: true sample: true
canonical_probability: 0.5 canonical_probability: 0.5
abbreviated_probability: 0.5 abbreviated_probability: 0.5
plural:
canonical: rooms
abbreviated: rms
canonical_probability: 0.6
abbreviated_probability: 0.4
# Room #1 and Room No. 1 # Room #1 and Room No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
hall: &hall hall: &hall
canonical: hall canonical: hall
plural:
canonical: halls
apartment: &apartment apartment: &apartment
canonical: apartment canonical: apartment
plural: apartments
abbreviated: apt abbreviated: apt
prefer_abbreviated: true prefer_abbreviated: true
sample: true sample: true
canonical_probability: 0.15 canonical_probability: 0.15
abbreviated_probability: 0.6 abbreviated_probability: 0.6
sample_probability: 0.25 sample_probability: 0.25
plural:
canonical: apartments
abbreviated: apts
canonical_probability: 0.2
abbreviated: 0.8
# Apt #1 and Apt No. 1 # Apt #1 and Apt No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 add_number_phrase_probability: 0.4
flat: &flat flat: &flat
canonical: flat canonical: flat
plural: flats abbreviated: flt
abbreviated: fl
sample: true sample: true
canonical_probability: 0.8 canonical_probability: 0.8
abbreviated_probability: 0.1 abbreviated_probability: 0.1
sample_probability: 0.1 sample_probability: 0.1
plural:
canonical: flats
abbreviated: flts
canonical_probability: 0.6
abbreviated_probability: 0.4
# Flat #1 and Flat No. 1 # Flat #1 and Flat No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.4 add_number_phrase_probability: 0.4
lot: &lot lot: &lot
canonical: lot canonical: lot
plural: lots
sample: true sample: true
canonical_probability: 0.9 canonical_probability: 0.9
sample_probability: 0.1 sample_probability: 0.1
plural:
canonical: lots
# Lot #1 and Lot No. 1 # Lot #1 and Lot No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
@@ -923,29 +951,34 @@ units:
canonical_probability: 0.9 canonical_probability: 0.9
sample: true sample: true
sample_probability: 0.1 sample_probability: 0.1
plural:
canonical: parcels
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.6
unit: &unit unit: &unit
canonical: unit canonical: unit
abbreviated: u abbreviated: u
add_number_phrase: true
add_number_phrase_probability: 0.3
sample: true sample: true
canonical_probability: 0.8 canonical_probability: 0.8
abbreviated_probability: 0.1 abbreviated_probability: 0.1
sample_probability: 0.1 sample_probability: 0.1
plural:
canonical: units
# Unit #1 and Unit No. 1 # Unit #1 and Unit No. 1
add_number_phrase: true add_number_phrase: true
add_number_phrase_probability: 0.6 add_number_phrase_probability: 0.4
alphanumeric: &unit_alphanumeric alphanumeric: &unit_alphanumeric
# Many unit types that apply only in Australia # Many unit types that apply only in Australia
# For most English-speaking countries, only use the terms defined above # For most English-speaking countries, only use the terms defined above
sample: false sample: false
default: *flat default: *flat
probability: 0.6 probability: 0.4
alternatives: alternatives:
- alternative: *unit - alternative: *unit
probability: 0.25 probability: 0.25
# e.g. just plain #3 or No. 4
- alternative: *number
probability: 0.2
- alternative: *door - alternative: *door
probability: 0.04 probability: 0.04
- alternative: *penthouse - alternative: *penthouse
@@ -960,7 +993,9 @@ units:
# Separate random probability for adding directions like 2L, 2R, etc. # Separate random probability for adding directions like 2L, 2R, etc.
add_direction: true add_direction: true
add_direction_probability: 0.1 add_direction_probability: 0.1
add_direction_numeric_only: true # Only for numbers # Add directions only for plain numbers
add_direction_numeric: true
add_direction_standalone: true
zone: zone:
residential: *unit_alphanumeric residential: *unit_alphanumeric
@@ -1005,7 +1040,7 @@ units:
# For unit types like 2/34 # For unit types like 2/34
combined: combined:
component: house_number component: house_number
direction: left direction: left # Apartment number goes to the left of the house number (Canada)
directional: directional:
modifier: modifier:
@@ -1034,8 +1069,8 @@ units:
# If no unit number is specified # If no unit number is specified
alphanumeric_probability: 0.75 alphanumeric_probability: 0.75
standalone_probability: 0.15 standalone_probability: 0.2
combined_probability: 0.1 combined_probability: 0.05
# Country-specific overrides # Country-specific overrides
# ========================== # ==========================
@@ -1142,17 +1177,25 @@ countries:
au: au:
po_boxes: &australia_po_boxes po_boxes: &australia_po_boxes
alphanumeric: alphanumeric:
default: *po_box
probability: 0.94
# Australia has many strings for this e.g. Roadside Mail Bag # Australia has many strings for this e.g. Roadside Mail Bag
sample: true sample: true
sample_probability: 0.05 sample_probability: 0.01
units: &australia_unit_types units: &australia_unit_types
alphanumeric: alphanumeric:
# Australia has all kinds of unit types (e.g. Marine Berth) not used elsewhere # Australia has all kinds of unit types (e.g. Marine Berth) not used elsewhere
sample: true sample: true
sample_probability: 0.2 default: *flat
# Reduce the default's probability to make room for sampling
probability: 0.59
sample_probability: 0.01
standalone: standalone:
default: *penthouse
sample: true sample: true
sample_probability: 0.2 # Reduce the default's probability to make room for sampling
probability: 0.39
sample_probability: 0.01
# New Zealand - same rules as Australia # New Zealand - same rules as Australia
nz: nz:
po_boxes: *australia_po_boxes po_boxes: *australia_po_boxes