505 lines
17 KiB
YAML
505 lines
17 KiB
YAML
global:
|
|
categories:
|
|
place_direction: right
|
|
direction_probability: 0.9
|
|
|
|
language_code_replacements:
|
|
ja_rm: en
|
|
ko_rm: en
|
|
zh_pinyin: en
|
|
|
|
insertions:
|
|
# For each component, insertions are mutually exclusive
|
|
# They don't have to sum to 1 (especially for components
|
|
# likely to be found in most addresses)
|
|
postcode:
|
|
postcode_before_city:
|
|
before: city
|
|
probability: 0.001
|
|
|
|
postcode_after_city:
|
|
after: city
|
|
probability: 0.0001
|
|
|
|
postcode_before_city_district:
|
|
before: city_district
|
|
probability: 0.0001
|
|
|
|
postcode_before_suburb:
|
|
before: suburb
|
|
probability: 0.0001
|
|
|
|
postcode_before_state_district:
|
|
before: state
|
|
probability: 0.0001
|
|
|
|
postcode_before_state:
|
|
before: state
|
|
probability: 0.0001
|
|
|
|
postcode_before_country:
|
|
before: country
|
|
probability: 0.05
|
|
|
|
postcode_after_country:
|
|
after: country
|
|
probability: 0.01
|
|
|
|
postcode_first:
|
|
first: true
|
|
probability: 0.001
|
|
|
|
postcode_last:
|
|
last: true
|
|
probability: 0.01
|
|
|
|
# PO Box should be the same in most countries
|
|
po_box:
|
|
po_box_before_city:
|
|
before: city
|
|
probability: 0.7
|
|
|
|
po_box_after_house:
|
|
after: house
|
|
probability: 0.2
|
|
|
|
po_box_first:
|
|
first: true
|
|
probability: 0.1
|
|
|
|
care_of:
|
|
care_of_after_attention:
|
|
after: attention
|
|
probability: 0.9
|
|
care_of_after_house:
|
|
after: house
|
|
probability: 0.1
|
|
|
|
subdivision:
|
|
subdivision_before_suburb:
|
|
before: suburb
|
|
probability: 1.0
|
|
|
|
# Overrides for languages (better for e.g. covering all French-speaking countries)
|
|
languages:
|
|
continental_european_languages:
|
|
insertions: &continental_european_template_insertions
|
|
building:
|
|
building_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.05
|
|
building_before_suburb:
|
|
before: suburb
|
|
probability: 0.15
|
|
conditional:
|
|
- component: subdivision
|
|
probabilities:
|
|
building_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
building_before_subdivision:
|
|
before: subdivision
|
|
probability: 0.2
|
|
|
|
entrance:
|
|
# e.g. Calle Foo 3, entrada 1
|
|
entrance_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
entrance_before_house:
|
|
before: house
|
|
probability: 0.2
|
|
conditional:
|
|
- component: building
|
|
probabilities:
|
|
entrance_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
staircase:
|
|
# e.g. 123 East 45th St, Staircase C, NYC
|
|
staircase_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
staircase_before_house:
|
|
before: house
|
|
probability: 0.2
|
|
conditional:
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
level:
|
|
# e.g. Calle Ruiz de Alarcón 23 piso 3
|
|
level_after_house_number:
|
|
after: house_number
|
|
probability: 0.95
|
|
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.03
|
|
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
|
|
level_before_road:
|
|
before: road
|
|
probability: 0.02
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.99
|
|
- component: entrance
|
|
probabilities:
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.99
|
|
- component: building
|
|
probabilities:
|
|
level_after_building:
|
|
after: building
|
|
probability: 0.99
|
|
|
|
unit:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
unit_after_house_number:
|
|
after: house_number
|
|
probability: 0.8
|
|
unit_before_road:
|
|
before: road
|
|
probability: 0.1
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
# default: 0.02
|
|
# e.g. Piso 3 Dpto 12 (most common)
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.93
|
|
# e.g. Apto 6, 2o piso (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.05
|
|
- component: staircase
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_staircase:
|
|
after: staircase
|
|
probability: 0.9
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.1
|
|
unit_after_building:
|
|
after: building
|
|
probability: 0.9
|
|
|
|
en:
|
|
insertions: &english_template_insertions
|
|
building:
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.6
|
|
building_after_road:
|
|
after: road
|
|
probability: 0.3
|
|
building_before_suburb:
|
|
before: suburb
|
|
probability: 0.1
|
|
conditional:
|
|
- component: subdivision
|
|
probabilities:
|
|
building_after_house:
|
|
after: house
|
|
probability: 0.6
|
|
building_after_road:
|
|
after: road
|
|
probability: 0.2
|
|
building_before_subdivision:
|
|
before: subdivision
|
|
probability: 0.2
|
|
entrance:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
entrance_after_road:
|
|
after: road
|
|
probability: 0.75
|
|
entrance_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
entrance_before_house_number:
|
|
before: house_number
|
|
probability: 0.15
|
|
conditional:
|
|
- component: building
|
|
probabilities:
|
|
# default: 0.8
|
|
entrance_after_building:
|
|
after: building
|
|
probability: 0.2
|
|
|
|
staircase:
|
|
# e.g. 123 East 45th St, Staircase C, NYC
|
|
staircase_after_road:
|
|
after: road
|
|
probability: 0.5
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
staircase_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
staircase_before_house_number:
|
|
before: house_number
|
|
probability: 0.4
|
|
conditional:
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.1
|
|
staircase_after_entrance:
|
|
after: entrance
|
|
probability: 0.9
|
|
|
|
level:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.5
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.25
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.25
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.6
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.6
|
|
|
|
unit:
|
|
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.2
|
|
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.6
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.2
|
|
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.1
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.1
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.79
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.01
|
|
|
|
es:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
de:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
nl:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
da:
|
|
insertions: *continental_european_template_insertions
|
|
|
|
countries:
|
|
# Hungary, e.g. 1075, Budapest Kazinczy utca 14
|
|
hu:
|
|
insertions:
|
|
postcode:
|
|
postcode_before_city:
|
|
probability: 0.5
|
|
|
|
# Malaysia (islands are bigger than states)
|
|
my:
|
|
admin_components:
|
|
island:
|
|
after:
|
|
- road
|
|
- suburb
|
|
- city_district
|
|
- city
|
|
- state_district
|
|
- state
|
|
before:
|
|
- country
|
|
|
|
us:
|
|
insertions:
|
|
level:
|
|
# e.g. 123 East 45th St, 6th Floor, NYC
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.875
|
|
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.005
|
|
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.12
|
|
|
|
unit:
|
|
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.05
|
|
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.05
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.8
|
|
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.09
|
|
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.01
|
|
|
|
fr:
|
|
insertions: &france_template_insertions
|
|
<<: *english_template_insertions
|
|
level:
|
|
level_after_road:
|
|
after: road
|
|
probability: 0.95
|
|
level_before_house:
|
|
before: house
|
|
probability: 0.01
|
|
level_before_house_number:
|
|
before: house_number
|
|
probability: 0.04
|
|
|
|
conditional:
|
|
- component: staircase
|
|
probabilities:
|
|
level_after_staircase:
|
|
after: staircase
|
|
probability: 0.95
|
|
- component: entrance
|
|
probabilities:
|
|
# default: 0.4
|
|
level_after_entrance:
|
|
after: entrance
|
|
probability: 0.95
|
|
|
|
unit:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.02
|
|
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.03
|
|
|
|
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
|
unit_after_road:
|
|
after: road
|
|
probability: 0.95
|
|
|
|
conditional:
|
|
- component: level
|
|
probabilities:
|
|
unit_before_house:
|
|
before: house
|
|
probability: 0.005
|
|
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
|
unit_before_house_number:
|
|
before: house_number
|
|
probability: 0.01
|
|
# e.g. Floor 5, Apt 6
|
|
unit_after_level:
|
|
after: level
|
|
probability: 0.98
|
|
# e.g. Apt. 6, 5/F (less common)
|
|
unit_before_level:
|
|
before: level
|
|
probability: 0.005
|
|
# libpostal issue #27
|
|
city:
|
|
city_before_road:
|
|
before: road
|
|
probability: 0.001
|
|
|
|
# Algeria
|
|
dz_fr:
|
|
insertions: *france_template_insertions
|
|
|
|
# Luxembourg
|
|
lu_fr:
|
|
insertions: *france_template_insertions
|
|
|
|
# Monaco
|
|
mc:
|
|
insertions: *france_template_insertions
|
|
|
|
# Senegal
|
|
sn:
|
|
insertions: *france_template_insertions
|
|
|
|
# Tunisia
|
|
tn_fr:
|
|
insertions: *france_template_insertions |