Files
libpostal/resources/formatting/global.yaml

736 lines
23 KiB
YAML

global:
categories:
place_direction: right
direction_probability: 0.9
language_code_replacements:
ja_rm: en
ko_rm: en
zh_pinyin: en
insertions:
# For each component, insertions are mutually exclusive
# They don't have to sum to 1 (especially for components
# likely to be found in most addresses)
postcode:
postcode_before_city:
before: city
probability: 0.001
postcode_after_city:
after: city
probability: 0.0001
postcode_before_city_district:
before: city_district
probability: 0.0001
postcode_before_suburb:
before: suburb
probability: 0.0001
postcode_before_state_district:
before: state
probability: 0.0001
postcode_before_state:
before: state
probability: 0.0001
postcode_before_country:
before: country
probability: 0.05
postcode_after_country:
after: country
probability: 0.01
postcode_first:
first: true
probability: 0.001
postcode_last:
last: true
probability: 0.01
# PO Box should be the same in most countries
po_box:
po_box_before_city:
before: city
probability: 0.7
po_box_after_house:
after: house
probability: 0.2
po_box_first:
first: true
probability: 0.1
care_of:
care_of_after_attention:
after: attention
probability: 0.9
care_of_after_house:
after: house
probability: 0.1
subdivision:
subdivision_before_suburb:
before: suburb
probability: 1.0
# Overrides for languages (better for e.g. covering all French-speaking countries)
languages:
continental_european_languages:
insertions: &continental_european_template_insertions
house_number:
house_number_before_road:
before: road
probability: 0.01
building:
building_after_house_number:
after: house_number
probability: 0.8
building_after_house:
after: house
probability: 0.05
building_before_suburb:
before: suburb
probability: 0.15
conditional:
- component: subdivision
probabilities:
building_after_house_number:
after: house_number
probability: 0.8
building_before_subdivision:
before: subdivision
probability: 0.2
entrance:
# e.g. Calle Foo 3, entrada 1
entrance_after_house_number:
after: house_number
probability: 0.8
entrance_after_house:
after: house
probability: 0.2
conditional:
- component: building
probabilities:
entrance_after_building:
after: building
probability: 0.9
staircase:
# e.g. 123 East 45th St, Staircase C, NYC
staircase_after_house_number:
after: house_number
probability: 0.9
staircase_after_house:
after: house
probability: 0.1
conditional:
- component: entrance
probabilities:
# default: 0.1
staircase_after_entrance:
after: entrance
probability: 0.9
- component: building
probabilities:
# default: 0.1
staircase_after_building:
after: building
probability: 0.9
level:
# e.g. Calle Ruiz de Alarcón 23 piso 3
level_after_house_number:
after: house_number
probability: 0.95
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
level_before_house:
before: house
probability: 0.03
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
level_before_road:
before: road
probability: 0.02
conditional:
- component: staircase
probabilities:
level_after_staircase:
after: staircase
probability: 0.99
- component: entrance
probabilities:
level_after_entrance:
after: entrance
probability: 0.99
- component: building
probabilities:
level_after_building:
after: building
probability: 0.99
unit:
unit_after_house_number:
after: house_number
probability: 0.9
unit_before_road:
before: road
probability: 0.1
conditional:
- component: level
probabilities:
# default: 0.02
# e.g. Piso 3 Dpto 12 (most common)
unit_after_level:
after: level
probability: 0.93
# e.g. Apto 6, 2o piso (less common)
unit_before_level:
before: level
probability: 0.05
- component: staircase
probabilities:
# default: 0.1
unit_after_staircase:
after: staircase
probability: 0.9
- component: entrance
probabilities:
# default: 0.1
unit_after_entrance:
after: entrance
probability: 0.9
- component: building
probabilities:
# default: 0.1
unit_after_building:
after: building
probability: 0.9
en:
insertions: &english_template_insertions
building:
building_after_house:
after: house
probability: 0.6
building_after_road:
after: road
probability: 0.3
building_before_suburb:
before: suburb
probability: 0.1
conditional:
- component: subdivision
probabilities:
building_after_house:
after: house
probability: 0.6
building_after_road:
after: road
probability: 0.2
building_before_subdivision:
before: subdivision
probability: 0.2
entrance:
# e.g. 123 East 45th St, 6th Floor, NYC
entrance_after_road:
after: road
probability: 0.75
entrance_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
entrance_before_house_number:
before: house_number
probability: 0.15
conditional:
- component: building
probabilities:
# default: 0.8
entrance_after_building:
after: building
probability: 0.2
staircase:
# e.g. 123 East 45th St, Staircase C, NYC
staircase_after_road:
after: road
probability: 0.5
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
staircase_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
staircase_before_house_number:
before: house_number
probability: 0.4
conditional:
- component: entrance
probabilities:
# default: 0.1
staircase_after_entrance:
after: entrance
probability: 0.9
level:
# e.g. 123 East 45th St, 6th Floor, NYC
level_after_road:
after: road
probability: 0.5
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
level_before_house:
before: house
probability: 0.25
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
level_before_house_number:
before: house_number
probability: 0.25
conditional:
- component: staircase
probabilities:
# default: 0.4
level_after_staircase:
after: staircase
probability: 0.6
- component: entrance
probabilities:
# default: 0.4
level_after_entrance:
after: entrance
probability: 0.6
unit:
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
unit_before_house:
before: house
probability: 0.2
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.6
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
unit_after_road:
after: road
probability: 0.2
conditional:
- component: level
probabilities:
unit_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.1
# e.g. Floor 5, Apt 6
unit_after_level:
after: level
probability: 0.79
# e.g. Apt. 6, 5/F (less common)
unit_before_level:
before: level
probability: 0.01
# Spanish
es:
insertions: *continental_european_template_insertions
# French - exceptions for countries (including France itself) below
fr:
insertions: *continental_european_template_insertions
# Portuguese
pt:
insertions: *continental_european_template_insertions
# Italian
it:
insertions: *continental_european_template_insertions
# German
de:
insertions: *continental_european_template_insertions
# Dutch
nl:
insertions: *continental_european_template_insertions
# Danish
da:
insertions: *continental_european_template_insertions
# Swedish
sv:
insertions: *continental_european_template_insertions
# Norwegian
nb:
insertions: *continental_european_template_insertions
# Polish
pl:
insertions: *continental_european_template_insertions
# Russian
ru:
insertions: *continental_european_template_insertions
# Czech
cs:
insertions: *continental_european_template_insertions
# Slovakian
sk:
insertions: *continental_european_template_insertions
# Estonian
et:
insertions: *continental_european_template_insertions
# Finnish
fi:
insertions: *continental_european_template_insertions
# Romanian
ro:
insertions: *continental_european_template_insertions
# Hungarian
hu:
insertions:
<<: *continental_european_template_insertions
# e.g. 1075, Budapest Kazinczy utca 14
postcode:
postcode_before_city:
before: city
probability: 0.5
# Ukrainian
uk:
insertions: *continental_european_template_insertions
# Lithuanian
lt:
insertions: *continental_european_template_insertions
# Latvian
lv:
insertions: *continental_european_template_insertions
# Serbian
sr:
insertions: *continental_european_template_insertions
# Croatian
hr:
insertions: *continental_european_template_insertions
# Slovenian
sl:
insertions: *continental_european_template_insertions
# Bosnian
bs:
insertions: *continental_european_template_insertions
# Hebrew - Israel basically uses the same format as continental Europe
he:
insertions: *continental_european_template_insertions
# Basque
eu:
insertions: *continental_european_template_insertions
# Catalan
ca:
insertions: *continental_european_template_insertions
# Bulgarian
bg:
insertions: *continental_european_template_insertions
# Greek
el:
insertions: *continental_european_template_insertions
# Icelandic
is:
insertions: *continental_european_template_insertions
countries:
# Malaysia (islands are bigger than states)
my:
admin_components:
island:
after:
- road
- suburb
- city_district
- city
- state_district
- state
before:
- country
# United Kingdom, in case language is unknown
gb:
insertions: *english_template_insertions
us:
insertions: &us_template_insertions
<<: *english_template_insertions
entrance:
entrance_after_road:
after: road
probability: 0.8
entrance_before_house_number:
before: house_number
probability: 0.2
conditional:
- component: building
probabilities:
# default: 0.8
entrance_after_building:
after: building
probability: 0.2
staircase:
# e.g. 123 East 45th St, Staircase C, NYC
staircase_after_road:
after: road
probability: 0.7
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
staircase_before_house_number:
before: house_number
probability: 0.3
conditional:
- component: entrance
probabilities:
# default: 0.1
staircase_after_entrance:
after: entrance
probability: 0.9
level:
# e.g. 123 East 45th St, 6th Floor, NYC
level_after_road:
after: road
probability: 0.9
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
level_before_house_number:
before: house_number
probability: 0.1
conditional:
- component: staircase
probabilities:
# default: 0.4
level_after_staircase:
after: staircase
probability: 0.6
- component: entrance
probabilities:
# default: 0.4
level_after_entrance:
after: entrance
probability: 0.6
unit:
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.1
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
unit_after_road:
after: road
probability: 0.9
conditional:
- component: level
# default: 0.1
probabilities:
# e.g. Floor 5, Apt 6
unit_after_level:
after: level
probability: 0.8
# e.g. Apt. 6, 5/F (less common)
unit_before_level:
before: level
probability: 0.1
# American Samoa
as:
insertions: *us_template_insertions
# Federated States of Micronesia
fm:
insertions: *us_template_insertions
# Guam
gu:
insertions: *us_template_insertions
# Marshall Islands
mh:
insertions: *us_template_insertions
# Northern Mariana Islands
mp:
insertions: *us_template_insertions
# Puerto Rico
pr:
insertions: *us_template_insertions
# U.S. Minor Outlying Islands
um:
insertions: *us_template_insertions
# U.S. Virgin Islands
vi:
insertions: *us_template_insertions
fr:
insertions: &france_template_insertions
<<: *english_template_insertions
house_number:
house_number_after_road:
after: road
probability: 0.01
level:
level_after_road:
after: road
probability: 0.95
level_before_house:
before: house
probability: 0.01
level_before_house_number:
before: house_number
probability: 0.04
conditional:
- component: staircase
probabilities:
level_after_staircase:
after: staircase
probability: 0.95
- component: entrance
probabilities:
# default: 0.4
level_after_entrance:
after: entrance
probability: 0.95
unit:
unit_before_house:
before: house
probability: 0.02
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.03
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
unit_after_road:
after: road
probability: 0.95
conditional:
- component: level
probabilities:
unit_before_house:
before: house
probability: 0.005
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.01
# e.g. Floor 5, Apt 6
unit_after_level:
after: level
probability: 0.98
# e.g. Apt. 6, 5/F (less common)
unit_before_level:
before: level
probability: 0.005
# libpostal issue #27
city:
city_before_road:
before: road
probability: 0.001
# Andorra, uses same template as France
ad_ca:
insetions: *france_template_insertions
# Algeria
dz_fr:
insertions: *france_template_insertions
# Luxembourg
lu_fr:
insertions: *france_template_insertions
# Monaco
mc:
insertions: *france_template_insertions
# Senegal
sn:
insertions: *france_template_insertions
# Tunisia
tn_fr:
insertions: *france_template_insertions
# China
cn:
insertions: {}
cn_en:
insertions: *us_template_insertions
# Hong Kong
hk:
insertions: {}
hk_en:
insertions: *english_template_insertions
# Taiwan
tw:
insertions: {}
en_tw:
insertions: *us_template_insertions
# Japan
jp:
insertions: {}
en_jp:
insertions: *us_template_insertions
# South Korea
kr:
insertions: {}
en_kr:
insertions: *us_template_insertions