745 lines
23 KiB
YAML
745 lines
23 KiB
YAML
global:
|
|
categories:
|
|
place_direction: right
|
|
direction_probability: 0.9
|
|
|
|
language_code_replacements:
|
|
ja_rm: en
|
|
ko_rm: en
|
|
zh_pinyin: en
|
|
|
|
insertions:
|
|
# For each component, insertions are mutually exclusive
|
|
# They don't have to sum to 1 (especially for components
|
|
# likely to be found in most addresses)
|
|
postcode:
|
|
postcode_before_city:
|
|
before: city
|
|
probability: 0.001
|
|
|
|
postcode_after_city:
|
|
after: city
|
|
probability: 0.0001
|
|
|
|
postcode_before_city_district:
|
|
before: city_district
|
|
probability: 0.0001
|
|
|
|
postcode_before_suburb:
|
|
before: suburb
|
|
probability: 0.0001
|
|
|
|
postcode_before_state_district:
|
|
before: state
|
|
probability: 0.0001
|
|
|
|
postcode_before_state:
|
|
before: state
|
|
probability: 0.0001
|
|
|
|
postcode_before_country:
|
|
before: country
|
|
probability: 0.05
|
|
|
|
postcode_after_country:
|
|
after: country
|
|
probability: 0.01
|
|
|
|
postcode_first:
|
|
first: true
|
|
probability: 0.001
|
|
|
|
postcode_last:
|
|
last: true
|
|
probability: 0.01
|
|
|
|
# PO Box should be the same in most countries
|
|
po_box:
|
|
po_box_before_city:
|
|
before: city
|
|
probability: 0.7
|
|
|
|
po_box_after_house:
|
|
after: house
|
|
probability: 0.2
|
|
|
|
po_box_first:
|
|
first: true
|
|
probability: 0.1
|
|
|
|
care_of:
|
|
care_of_after_attention:
|
|
after: attention
|
|
probability: 0.9
|
|
care_of_after_house:
|
|
after: house
|
|
probability: 0.1
|
|
|
|
subdivision:
|
|
subdivision_before_suburb:
|
|
before: suburb
|
|
probability: 1.0
|
|
|
|
# Overrides for languages (better for e.g. covering all French-speaking countries)
|
|
languages:
|
|
continental_european_languages:
|
|
insertions: &continental_european_template_insertions
|
|
house_number:
|
|
house_number_before_road:
|
|
before: road
|
|
probability: 0.01
|
|
|
|
building:
|
|
building_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.05
|
|
building_before_suburb:
|
|
before: suburb
|
|
probability: 0.15
|
|
conditional:
|
|
- component: subdivision
|
|
probabilities:
|
|
building_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
building_before_subdivision:
|
|
before: subdivision
|
|
probability: 0.2
|
|
|
|
entrance:
|
|
# e.g. Calle Foo 3, entrada 1
|
|
entrance_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
entrance_after_house:
|
|
after: house
|
|
probability: 0.2
|
|
conditional:
|
|
- component: building
|
|
probabilities:
|
|
entrance_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
staircase:
|
|
# e.g. 123 East 45th St, Staircase C, NYC
|
|
staircase_after_house_number:
|
|
after: house_number
|
|
probability: 0.9
|
|
staircase_after_house:
|
|
after: house
|
|
probability: 0.1
|
|
conditional:
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
level:
|
|
# e.g. Calle Ruiz de Alarcón 23 piso 3
|
|
level_after_house_number:
|
|
after: house_number
|
|
probability: 0.95
|
|
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.03
|
|
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
|
|
level_before_road:
|
|
before: road
|
|
probability: 0.02
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.99
|
|
- component: entrance
|
|
probabilities:
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.99
|
|
- component: building
|
|
probabilities:
|
|
level_after_building:
|
|
after: building
|
|
probability: 0.99
|
|
|
|
unit:
|
|
unit_after_house_number:
|
|
after: house_number
|
|
probability: 0.9
|
|
unit_before_road:
|
|
before: road
|
|
probability: 0.1
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
# default: 0.02
|
|
# e.g. Piso 3 Dpto 12 (most common)
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.93
|
|
# e.g. Apto 6, 2o piso (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.05
|
|
- component: staircase
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_staircase:
|
|
after: staircase
|
|
probability: 0.9
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
en:
|
|
insertions: &english_template_insertions
|
|
building:
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.6
|
|
building_after_road:
|
|
after: road
|
|
probability: 0.3
|
|
building_before_suburb:
|
|
before: suburb
|
|
probability: 0.1
|
|
conditional:
|
|
- component: subdivision
|
|
probabilities:
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.6
|
|
building_after_road:
|
|
after: road
|
|
probability: 0.2
|
|
building_before_subdivision:
|
|
before: subdivision
|
|
probability: 0.2
|
|
entrance:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
entrance_after_road:
|
|
after: road
|
|
probability: 0.75
|
|
entrance_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
entrance_before_house_number:
|
|
before: house_number
|
|
probability: 0.15
|
|
conditional:
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.8
|
|
entrance_after_building:
|
|
after: building
|
|
probability: 0.2
|
|
|
|
staircase:
|
|
# e.g. 123 East 45th St, Staircase C, NYC
|
|
staircase_after_road:
|
|
after: road
|
|
probability: 0.5
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
staircase_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
staircase_before_house_number:
|
|
before: house_number
|
|
probability: 0.4
|
|
conditional:
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
|
|
level:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.5
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.25
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.25
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.6
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.6
|
|
|
|
unit:
|
|
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.2
|
|
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.6
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.2
|
|
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.1
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.79
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.01
|
|
|
|
# Spanish
|
|
es:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# French - exceptions for countries (including France itself) below
|
|
fr:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Portuguese
|
|
pt:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Italian
|
|
it:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# German
|
|
de:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Dutch
|
|
nl:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Danish
|
|
da:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Swedish
|
|
sv:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Norwegian
|
|
nb:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Polish
|
|
pl:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Russian
|
|
ru:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Czech
|
|
cs:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Slovakian
|
|
sk:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Estonian
|
|
et:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Finnish
|
|
fi:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Romanian
|
|
ro:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Hungarian
|
|
hu:
|
|
insertions:
|
|
<<: *continental_european_template_insertions
|
|
# e.g. 1075, Budapest Kazinczy utca 14
|
|
postcode:
|
|
postcode_before_city:
|
|
before: city
|
|
probability: 0.5
|
|
|
|
# Ukrainian
|
|
uk:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Lithuanian
|
|
lt:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Latvian
|
|
lv:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Serbian
|
|
sr:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Croatian
|
|
hr:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Slovenian
|
|
sl:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Bosnian
|
|
bs:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Hebrew - Israel basically uses the same format as continental Europe
|
|
he:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Basque
|
|
eu:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Catalan
|
|
ca:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Bulgarian
|
|
bg:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Greek
|
|
el:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
# Icelandic
|
|
is:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
countries:
|
|
|
|
# Malaysia (islands are bigger than states)
|
|
my:
|
|
admin_components:
|
|
island:
|
|
after:
|
|
- road
|
|
- suburb
|
|
- city_district
|
|
- city
|
|
- state_district
|
|
- state
|
|
before:
|
|
- country
|
|
|
|
# United Kingdom, in case language is unknown
|
|
gb:
|
|
insertions: *english_template_insertions
|
|
|
|
us:
|
|
insertions: &us_template_insertions
|
|
<<: *english_template_insertions
|
|
entrance:
|
|
entrance_after_road:
|
|
after: road
|
|
probability: 0.8
|
|
entrance_before_house_number:
|
|
before: house_number
|
|
probability: 0.2
|
|
conditional:
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.8
|
|
entrance_after_building:
|
|
after: building
|
|
probability: 0.2
|
|
|
|
staircase:
|
|
# e.g. 123 East 45th St, Staircase C, NYC
|
|
staircase_after_road:
|
|
after: road
|
|
probability: 0.7
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
staircase_before_house_number:
|
|
before: house_number
|
|
probability: 0.3
|
|
conditional:
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
|
|
|
|
level:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.9
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.1
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.6
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.6
|
|
|
|
unit:
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.1
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.9
|
|
|
|
conditional:
|
|
- component: level
|
|
# default: 0.1
|
|
probabilities:
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.8
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.1
|
|
# American Samoa
|
|
as:
|
|
insertions: *us_template_insertions
|
|
|
|
# Federated States of Micronesia
|
|
fm:
|
|
insertions: *us_template_insertions
|
|
|
|
# Guam
|
|
gu:
|
|
insertions: *us_template_insertions
|
|
|
|
# Marshall Islands
|
|
mh:
|
|
insertions: *us_template_insertions
|
|
|
|
# Northern Mariana Islands
|
|
mp:
|
|
insertions: *us_template_insertions
|
|
|
|
# Puerto Rico
|
|
pr:
|
|
insertions:
|
|
<<: *us_template_insertions
|
|
postcode:
|
|
postcode_after_country:
|
|
probability: 0.85
|
|
|
|
# U.S. Minor Outlying Islands
|
|
um:
|
|
insertions: *us_template_insertions
|
|
|
|
# U.S. Virgin Islands
|
|
vi:
|
|
insertions: *us_template_insertions
|
|
|
|
# Canada
|
|
ca:
|
|
insertions: *us_template_insertions
|
|
|
|
fr:
|
|
insertions: &france_template_insertions
|
|
<<: *english_template_insertions
|
|
house_number:
|
|
house_number_after_road:
|
|
after: road
|
|
probability: 0.01
|
|
|
|
level:
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.95
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.01
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.04
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.95
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.95
|
|
|
|
unit:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.02
|
|
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.03
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.95
|
|
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.005
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.01
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.98
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.005
|
|
# libpostal issue #27
|
|
city:
|
|
city_before_road:
|
|
before: road
|
|
probability: 0.001
|
|
allow_between_house_number_and_road: true
|
|
|
|
# Andorra, uses same template as France
|
|
ad_ca:
|
|
insetions: *france_template_insertions
|
|
|
|
# Algeria
|
|
dz_fr:
|
|
insertions: *france_template_insertions
|
|
|
|
# Luxembourg
|
|
lu_fr:
|
|
insertions: *france_template_insertions
|
|
|
|
# Monaco
|
|
mc:
|
|
insertions: *france_template_insertions
|
|
|
|
# Senegal
|
|
sn:
|
|
insertions: *france_template_insertions
|
|
|
|
# Tunisia
|
|
tn_fr:
|
|
insertions: *france_template_insertions
|
|
|
|
# China
|
|
cn:
|
|
insertions: {}
|
|
|
|
cn_en:
|
|
insertions: *us_template_insertions
|
|
|
|
# Hong Kong
|
|
hk:
|
|
insertions: {}
|
|
|
|
hk_en:
|
|
insertions: *english_template_insertions
|
|
|
|
# Taiwan
|
|
tw:
|
|
insertions: {}
|
|
|
|
tw_en:
|
|
insertions: *us_template_insertions
|
|
|
|
# Japan
|
|
jp:
|
|
insertions: {}
|
|
|
|
jp_en:
|
|
insertions: *us_template_insertions
|
|
|
|
# South Korea
|
|
kr:
|
|
insertions: {}
|
|
|
|
kr_en:
|
|
insertions: *us_template_insertions
|