Files
libpostal/resources/formatting/global.yaml

418 lines
14 KiB
YAML

global:
categories:
place_direction: right
direction_probability: 0.9
insertions:
# For each component, insertions are mutually exclusive
# They don't have to sum to 1 (especially for components
# likely to be found in most addresses)
postcode:
postcode_before_city:
before: city
probability: 0.001
postcode_after_city:
after: city
probability: 0.0001
postcode_before_city_district:
before: city_district
probability: 0.0001
postcode_before_suburb:
before: suburb
probability: 0.0001
postcode_before_state_district:
before: state
probability: 0.0001
postcode_before_state:
before: state
probability: 0.0001
postcode_before_country:
before: country
probability: 0.05
postcode_after_country:
after: country
probability: 0.01
postcode_first:
first: true
probability: 0.001
postcode_last:
last: true
probability: 0.01
# PO Box should be the same in most countries
po_box:
po_box_before_city:
before: city
probability: 0.7
po_box_after_house:
after: house
probability: 0.2
po_box_first:
first: true
probability: 0.1
care_of:
care_of_after_attention:
after: attention
probability: 0.9
care_of_after_house:
after: house
probability: 0.1
subdivision:
subdivision_before_suburb:
before: suburb
probability: 1.0
# Overrides for languages (better for e.g. covering all French-speaking countries)
languages:
en:
insertions:
building:
building_after_house:
after: house
probability: 0.6
building_after_road:
after: road
probability: 0.3
building_before_suburb:
before: suburb
probability: 0.1
conditional:
- component: subdivision
probabilities:
building_after_house:
after: house
probability: 0.6
building_after_road:
after: road
probability: 0.2
building_before_subdivision:
before: subdivision
probability: 0.2
entrance:
# e.g. 123 East 45th St, 6th Floor, NYC
entrance_after_road:
after: road
probability: 0.75
entrance_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
entrance_before_house_number:
before: house_number
probability: 0.15
conditional:
- component: building
probabilities:
# default: 0.8
entrance_after_building:
after: building
probability: 0.2
staircase:
# e.g. 123 East 45th St, Staircase C, NYC
staircase_after_road:
after: road
probability: 0.5
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
staircase_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
staircase_before_house_number:
before: house_number
probability: 0.4
conditional:
- component: entrance
probabilities:
# default: 0.1
staircase_after_entrance:
after: entrance
probability: 0.9
level:
# e.g. 123 East 45th St, 6th Floor, NYC
level_after_road:
after: road
probability: 0.5
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
level_before_house:
before: house
probability: 0.25
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
level_before_house_number:
before: house_number
probability: 0.25
conditional:
- component: staircase
probabilities:
# default: 0.4
level_after_staircase:
after: staircase
probability: 0.6
- component: entrance
probabilities:
# default: 0.4
level_after_entrance:
after: entrance
probability: 0.6
unit:
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
unit_before_house:
before: house
probability: 0.2
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.6
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
unit_after_road:
after: road
probability: 0.2
conditional:
- component: level
probabilities:
unit_before_house:
before: house
probability: 0.1
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.1
# e.g. Floor 5, Apt 6
unit_after_level:
after: level
probability: 0.79
# e.g. Apt. 6, 5/F (less common)
unit_before_level:
before: level
probability: 0.01
es:
insertions:
building:
building_after_house_number:
after: house_number
probability: 0.8
building_after_house:
after: house
probability: 0.05
building_before_suburb:
before: suburb
probability: 0.15
conditional:
- component: subdivision
probabilities:
building_after_house_number:
after: house_number
probability: 0.8
building_before_subdivision:
before: subdivision
probability: 0.2
entrance:
# e.g. 123 East 45th St, 6th Floor, NYC
entrance_after_house_number:
after: house_number
probability: 0.8
entrance_before_house:
before: house
probability: 0.2
conditional:
- component: building
probabilities:
entrance_after_building:
after: building
probability: 0.9
staircase:
# e.g. 123 East 45th St, Staircase C, NYC
staircase_after_house_number:
after: house_number
probability: 0.8
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
staircase_before_house:
before: house
probability: 0.2
conditional:
- component: entrance
probabilities:
# default: 0.1
staircase_after_entrance:
after: entrance
probability: 0.9
- component: building
probabilities:
# default: 0.1
staircase_after_building:
after: building
probability: 0.9
level:
# e.g. Calle Ruiz de Alarcón 23 piso 3
level_after_house_number:
after: house_number
probability: 0.8
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
level_before_house:
before: house
probability: 0.1
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
level_before_road:
before: road
probability: 0.1
conditional:
- component: staircase
probabilities:
# default: 0.1
level_after_staircase:
after: staircase
probability: 0.9
- component: entrance
probabilities:
# default: 0.1
level_after_entrance:
after: entrance
probability: 0.9
- component: building
probabilities:
# default: 0.1
level_after_building:
after: building
probability: 0.9
unit:
unit_before_house:
before: house
probability: 0.2
unit_after_house_number:
after: house_number
probability: 0.7
unit_before_road:
before: road
probability: 0.1
conditional:
- component: level
probabilities:
# default: 0.1
# e.g. Piso 3 Dpto 12 (most common)
unit_after_level:
after: level
probability: 0.85
# e.g. Apto 6, 2o piso (less common)
unit_before_level:
before: level
probability: 0.05
- component: staircase
probabilities:
# default: 0.1
unit_after_staircase:
after: staircase
probability: 0.9
- component: entrance
probabilities:
# default: 0.1
unit_after_entrance:
after: entrance
probability: 0.9
- component: building
probabilities:
# default: 0.1
unit_after_building:
after: building
probability: 0.9
fr:
# libpostal issue #27
insertions:
city:
city_before_road:
before: road
probability: 0.001
countries:
# Hungary, e.g. 1075, Budapest Kazinczy utca 14
hu:
insertions:
postcode:
postcode_before_city:
probability: 0.5
# Malaysia (islands are bigger than states)
my:
admin_components:
island:
after:
- road
- suburb
- city_district
- city
- state_district
- state
before:
- country
us:
insertions:
level:
# e.g. 123 East 45th St, 6th Floor, NYC
level_after_road:
after: road
probability: 0.875
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
level_before_house:
before: house
probability: 0.005
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
level_before_house_number:
before: house_number
probability: 0.12
unit:
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
unit_before_house:
before: house
probability: 0.05
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
unit_before_house_number:
before: house_number
probability: 0.05
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
unit_after_road:
after: road
probability: 0.8
# e.g. Floor 5, Apt 6
unit_after_level:
after: level
probability: 0.09
# e.g. Apt. 6, 5/F (less common)
unit_before_level:
before: level
probability: 0.01