[formatting] New formatter config including random component component order changes and default/per-country admin component ordering
This commit is contained in:
285
resources/formatting/global.yaml
Normal file
285
resources/formatting/global.yaml
Normal file
@@ -0,0 +1,285 @@
|
||||
global:
|
||||
# Add these components to templates that don't have them
|
||||
admin_components:
|
||||
subdivision:
|
||||
after:
|
||||
- road
|
||||
before:
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
- island
|
||||
- state_district
|
||||
- state
|
||||
- postcode
|
||||
- country
|
||||
suburb:
|
||||
after:
|
||||
- road
|
||||
- subdivision
|
||||
before:
|
||||
- city_district
|
||||
- city
|
||||
- island
|
||||
- state_district
|
||||
- state
|
||||
- postcode
|
||||
- country
|
||||
city_district:
|
||||
after:
|
||||
- road
|
||||
- suburb
|
||||
before:
|
||||
- city
|
||||
- island
|
||||
- state_district
|
||||
- state
|
||||
- postcode
|
||||
- country
|
||||
# This is added to all the templates but only makes it in
|
||||
island:
|
||||
after:
|
||||
- road
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
before:
|
||||
- state_district
|
||||
- state
|
||||
- country
|
||||
state_district:
|
||||
after:
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
- island
|
||||
before:
|
||||
- state
|
||||
- country
|
||||
state:
|
||||
after:
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
- island
|
||||
- state_district
|
||||
before:
|
||||
- country
|
||||
country:
|
||||
after:
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
- island
|
||||
- state_district
|
||||
- state
|
||||
- postcode
|
||||
|
||||
insertions:
|
||||
# For each component, insertions are mutually exclusive
|
||||
# They don't have to sum to 1 (especially for components
|
||||
# likely to be found in most addresses)
|
||||
postcode:
|
||||
postcode_before_city:
|
||||
before: city
|
||||
probability: 0.0001
|
||||
|
||||
postcode_after_city:
|
||||
after: city
|
||||
probability: 0.0001
|
||||
|
||||
postcode_before_city_district:
|
||||
before: city_district
|
||||
probability: 0.0001
|
||||
|
||||
postcode_before_suburb:
|
||||
before: suburb
|
||||
probability: 0.0001
|
||||
|
||||
postcode_before_state_district:
|
||||
before: state
|
||||
probability: 0.0001
|
||||
|
||||
postcode_before_state:
|
||||
before: state
|
||||
probability: 0.0001
|
||||
|
||||
postcode_before_country:
|
||||
before: country
|
||||
probability: 0.05
|
||||
|
||||
postcode_after_country:
|
||||
after: country
|
||||
probability: 0.01
|
||||
|
||||
postcode_first:
|
||||
first: true
|
||||
probability: 0.001
|
||||
|
||||
postcode_last:
|
||||
last: true
|
||||
probability: 0.01
|
||||
|
||||
# PO Box should be the same in most countries
|
||||
po_box:
|
||||
po_box_before_city:
|
||||
before: city
|
||||
probability: 0.7
|
||||
|
||||
po_box_after_house:
|
||||
after: house
|
||||
probability: 0.2
|
||||
|
||||
po_box_first:
|
||||
first: true
|
||||
probability: 0.1
|
||||
|
||||
# Overrides for languages (better for e.g. covering all French-speaking countries)
|
||||
languages:
|
||||
en:
|
||||
insertions:
|
||||
level:
|
||||
# e.g. 123 East 45th St, 6th Floor, NYC
|
||||
level_after_road:
|
||||
after: road
|
||||
probability: 0.5
|
||||
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
||||
level_before_house:
|
||||
before: house
|
||||
probability: 0.25
|
||||
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
||||
level_before_road:
|
||||
before: road
|
||||
probability: 0.25
|
||||
|
||||
unit:
|
||||
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
|
||||
unit_before_house:
|
||||
before: house
|
||||
probability: 0.2
|
||||
|
||||
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
||||
unit_before_house_number:
|
||||
before: house_number
|
||||
probability: 0.6
|
||||
|
||||
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
||||
unit_after_road:
|
||||
after: road
|
||||
probability: 0.1
|
||||
|
||||
# e.g. Floor 5, Apt 6
|
||||
unit_after_level:
|
||||
after: level
|
||||
probability: 0.09
|
||||
|
||||
# e.g. Apt. 6, 5/F (less common)
|
||||
unit_before_level:
|
||||
before: level
|
||||
probability: 0.01
|
||||
|
||||
es:
|
||||
insertions:
|
||||
level:
|
||||
# e.g. Calle Ruiz de Alarcón 23 piso 3
|
||||
level_after_house_number:
|
||||
after: house_number
|
||||
probability: 0.8
|
||||
# e.g. Piso 3, Museo del Prado, Calle Ruiz de Alarcón 23
|
||||
level_before_house:
|
||||
before: house
|
||||
probability: 0.1
|
||||
# e.g. Museo del Prado, Bajos, Calle Ruiz de Alarcón 23
|
||||
level_before_road:
|
||||
before: road
|
||||
probability: 0.1
|
||||
|
||||
unit:
|
||||
unit_before_house:
|
||||
before: house
|
||||
probability: 0.05
|
||||
unit_before_house_number:
|
||||
before: house_number
|
||||
probability: 0.05
|
||||
# e.g. Piso 3 Dpto 12 (most common)
|
||||
unit_after_level:
|
||||
after: level
|
||||
probability: 0.8
|
||||
# e.g. Apto 6, 2o piso (less common)
|
||||
unit_before_level:
|
||||
before: level
|
||||
probability: 0.1
|
||||
|
||||
fr:
|
||||
# libpostal issue #27
|
||||
insertions:
|
||||
city:
|
||||
city_before_road:
|
||||
before: road
|
||||
probability: 0.001
|
||||
|
||||
|
||||
countries:
|
||||
# Hungary, e.g. 1075, Budapest Kazinczy utca 14
|
||||
hu:
|
||||
insertions:
|
||||
postcode:
|
||||
postcode_before_city:
|
||||
probability: 0.5
|
||||
|
||||
# Malaysia (islands are bigger than states)
|
||||
my:
|
||||
admin_components:
|
||||
island:
|
||||
after:
|
||||
- road
|
||||
- suburb
|
||||
- city_district
|
||||
- city
|
||||
- state_district
|
||||
- state
|
||||
before:
|
||||
- country
|
||||
|
||||
us:
|
||||
insertions:
|
||||
level:
|
||||
# e.g. 123 East 45th St, 6th Floor, NYC
|
||||
level_after_road:
|
||||
after: road
|
||||
probability: 0.75
|
||||
# e.g. Floor 1, Da Vinci House, 44 Saffron Hill, London
|
||||
level_before_house:
|
||||
before: house
|
||||
probability: 0.125
|
||||
# e.g. Da Vinci House, 1st Floor, 44 Saffron Hill, London
|
||||
level_before_road:
|
||||
before: road
|
||||
probability: 0.125
|
||||
|
||||
unit:
|
||||
# e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London
|
||||
unit_before_house:
|
||||
before: house
|
||||
probability: 0.05
|
||||
|
||||
# e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London
|
||||
unit_before_house_number:
|
||||
before: house_number
|
||||
probability: 0.05
|
||||
|
||||
# e.g. Da Vinci House, 44 Saffron Hill, Flat 18, London (not as common in UK)
|
||||
unit_after_road:
|
||||
after: road
|
||||
probability: 0.8
|
||||
|
||||
# e.g. Floor 5, Apt 6
|
||||
unit_after_level:
|
||||
after: level
|
||||
probability: 0.09
|
||||
|
||||
# e.g. Apt. 6, 5/F (less common)
|
||||
unit_before_level:
|
||||
before: level
|
||||
probability: 0.01
|
||||
|
||||
Reference in New Issue
Block a user