Initial fork commit
This commit is contained in:
1001
resources/addresses/bg.yaml
Normal file
1001
resources/addresses/bg.yaml
Normal file
File diff suppressed because it is too large
Load Diff
585
resources/addresses/bs.yaml
Normal file
585
resources/addresses/bs.yaml
Normal file
@@ -0,0 +1,585 @@
|
||||
# bs.yaml
|
||||
# -------
|
||||
# Bosnian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.7
|
||||
alphanumeric_probability: 0.3
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
# For unit types like 2/34
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
|
||||
|
||||
numbers:
|
||||
no_number:
|
||||
default:
|
||||
canonical: bez broja
|
||||
abbreviated: bb
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
|
||||
default: &broj
|
||||
canonical: broj
|
||||
abbreviated: br
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "br."
|
||||
whitespace_probability: 0.6
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
alphanumeric_phrase_probability: 0.05
|
||||
no_number_probability: 0.05
|
||||
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: i
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
cross_streets:
|
||||
i: *i
|
||||
at: &na
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &ugao
|
||||
canonical: ugao
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner_of: &uglu
|
||||
canonical: uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu: &na_uglu
|
||||
canonical: na uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *na
|
||||
probability: 0.1
|
||||
- alternative: *uglu
|
||||
probability: 0.1
|
||||
- alternative: *na_uglu
|
||||
probability: 0.1
|
||||
- alternative: *ugao
|
||||
probability: 0.05
|
||||
|
||||
izmedu: &izmedu
|
||||
canonical: između
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *izmedu
|
||||
|
||||
levels:
|
||||
sprat: &sprat
|
||||
canonical: sprat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
kat: &kat
|
||||
canonical: kat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
prizemlje: &prizemlje
|
||||
canonical: prizemlje
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
mezanino: &mezanin
|
||||
canonical: mezanin
|
||||
half_floors: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
sample: true
|
||||
# e.g. mezanin 2
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. 2. mezanin
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.1
|
||||
ordinal_probability: 0.2
|
||||
standalone_probability: 0.6
|
||||
podrum: &podrum
|
||||
canonical: podrum
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. podrum 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. podrum
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podrum
|
||||
"-1":
|
||||
default: *podrum
|
||||
# Special token for half-floors
|
||||
half_floors:
|
||||
default: *mezanin
|
||||
"0":
|
||||
default: *prizemlje
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *parter
|
||||
probability: 0.4
|
||||
- alternative: *kat
|
||||
probability: 0.05
|
||||
- alternative: *sprat
|
||||
probability: 0.05
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *kat
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *sprat
|
||||
probability: 0.5
|
||||
numeric_probability: 0.69 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: u blizini
|
||||
nearby:
|
||||
default:
|
||||
canonical: u blizini
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: u blizini ovdje
|
||||
probability: 0.3
|
||||
- alternative:
|
||||
canonical: ovde
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: u blizini mene
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: u
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &desno
|
||||
canonical: desno
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &lijevo
|
||||
canonical: lijevo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.5
|
||||
- alternative: *lijevo
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &istok
|
||||
canonical: istok
|
||||
abbreviated: i
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: i
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zapad
|
||||
canonical: zapad
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sjever
|
||||
canonical: sjever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &jug
|
||||
canonical: jug
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sjever
|
||||
probability: 0.25
|
||||
- alternative: *istok
|
||||
probability: 0.23
|
||||
- alternative: *jug
|
||||
probability: 0.23
|
||||
- alternative: *zapad
|
||||
probability: 0.23
|
||||
|
||||
entrances:
|
||||
ulaz: &ulaz
|
||||
canonical: ulaz
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Ulaz 1, Ulaz A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ulaz
|
||||
numeric_probability: 0.1 # e.g. Ulaz 1
|
||||
alpha_probability: 0.85 # e.g. Ulaz A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
staircases:
|
||||
stubiste: &stubiste
|
||||
canonical: stubište
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stubiste
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.2
|
||||
- alternative: *lijevo
|
||||
probability: 0.2
|
||||
- alternative: *sjever
|
||||
probability: 0.15
|
||||
- alternative: *jug
|
||||
probability: 0.15
|
||||
- alternative: *istok
|
||||
probability: 0.15
|
||||
- alternative: *zapad
|
||||
probability: 0.15
|
||||
|
||||
po_boxes:
|
||||
postanski_pretinac: &postanski_pretinac
|
||||
canonical: poštanski pretinac
|
||||
abbreviated: p.p
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *postanski_pretinac
|
||||
numeric_probability: 0.9 # pp 123
|
||||
alpha_probability: 0.05 # p.p A
|
||||
numeric_plus_alpha_probability: 0.04 # pp 123G
|
||||
alpha_plus_numeric_probability: 0.01 # pp A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
stan: &stan
|
||||
canonical: stan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
apartman: &apartman
|
||||
canonical: apartman
|
||||
abbreviated: ap
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
soba: &soba
|
||||
canonical: soba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ured: &ured
|
||||
canonical: ured
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *stan
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *apartman
|
||||
probability: 0.3
|
||||
- alternative: *soba
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. stan. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. stan A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *soba
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *ured
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *soba
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
856
resources/addresses/ca.yaml
Normal file
856
resources/addresses/ca.yaml
Normal file
@@ -0,0 +1,856 @@
|
||||
# ca.yaml
|
||||
# -------
|
||||
# Catalan language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
# If no floor number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.35
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.3
|
||||
alphanumeric_probability: 0.65
|
||||
standalone_probability: 0.05
|
||||
|
||||
numbers:
|
||||
default: &numero
|
||||
canonical: número
|
||||
abbreviated: "nº"
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#" # e.g. #3, #2F, etc.
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
direction: left # affix goes on the number's left
|
||||
|
||||
# Probabilities for numbers
|
||||
numeric_probability: 0.7
|
||||
numeric_affix_probability: 0.3
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: i
|
||||
abbreviated: "&"
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.1
|
||||
|
||||
house_numbers:
|
||||
# sense número (s/n) addresses
|
||||
no_number:
|
||||
default:
|
||||
canonical: sense número
|
||||
abbreviated: s/n
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
alphanumeric:
|
||||
default: *numero
|
||||
|
||||
alphanumeric_phrase_probability: 0.01
|
||||
no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified
|
||||
|
||||
|
||||
|
||||
levels:
|
||||
# Everywhere except Spain
|
||||
floor: &pis
|
||||
canonical: pis
|
||||
abbreviated: p
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true # Occasionally add variation of "number", e.g. Pis No 2
|
||||
add_number_phrase_probability: 0.05
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: left # P2
|
||||
# e.g. 2o piso
|
||||
ordinal:
|
||||
direction: right
|
||||
direction_probability: 0.95 # Let it vary occasionally e.g. Pis 2o
|
||||
standalone_probability: 0.2 # Let e.g. 5º be the entire floor string
|
||||
# If ordinal is selected, chance of e.g. just using 2o without Piso
|
||||
null_phrase_probability: 0.6
|
||||
numeric_probability: 0.2
|
||||
numeric_affix_probability: 0.05
|
||||
ordinal_probability: 0.75
|
||||
# Ground floor
|
||||
baixos: &baixos
|
||||
canonical: baixos
|
||||
abbreviated: bxs
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
pis_baix: &pis_baix
|
||||
canonical: pis baix
|
||||
abbreviated: pb
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
sota: &sota
|
||||
canonical: sota
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# Used when floor number is < 0 (starts at -1 in all countries)
|
||||
soterrani: &soterrani
|
||||
canonical: soterrani
|
||||
abbreviated: so
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
# e.g. soterrani 1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: so
|
||||
direction: left
|
||||
# e.g. segon soterrani
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
sub_soterrani: &sub_soterrani
|
||||
canonical: sub soterrani
|
||||
abbreviated: ss
|
||||
sample: true
|
||||
# e.g. sub soterrani 1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: ss
|
||||
direction: left
|
||||
# e.g. segon sub soterrani
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
# Soterrani 2 == Sub-soterrani 1
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
entresol: &entresol
|
||||
canonical: entresòl
|
||||
abbreviated: entl
|
||||
half_floors: true
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
# e.g. entresòl 2
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. ent2
|
||||
numeric_affix:
|
||||
affix: ent
|
||||
direction: left
|
||||
# e.g. segon entresòl
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.1
|
||||
numeric_affix_probability: 0.1
|
||||
ordinal_probability: 0.2
|
||||
standalone_probability: 0.6
|
||||
pis_principal: &pis_principal
|
||||
canonical: pis principal
|
||||
abbreviated: pis pral
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.5
|
||||
principal: &principal
|
||||
canonical: principal
|
||||
abbreviated: pral
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
atic: &atic
|
||||
canonical: àtic
|
||||
abbreviated: át
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.2
|
||||
sobreatic: &sobreatic
|
||||
canonical: sobreàtic
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *soterrani
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *sub_soterrani
|
||||
probability: 0.3995
|
||||
- alternative: *pis
|
||||
probability: 0.0005
|
||||
"-1":
|
||||
default: *soterrani
|
||||
probability: 0.9995
|
||||
alternatives:
|
||||
- alternative: *pis
|
||||
probability: 0.0005
|
||||
# Special token for half-floors
|
||||
half_floors:
|
||||
default: *entresol
|
||||
"0":
|
||||
default: *baixos
|
||||
probability: 0.495
|
||||
alternatives:
|
||||
- alternative: *pis_baix
|
||||
probability: 0.395
|
||||
- alternative: *sota
|
||||
probability: 0.1
|
||||
- alternative: *pis
|
||||
# Piso 0 is uncommon
|
||||
probability: 0.01
|
||||
top:
|
||||
default: *pis
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *atic
|
||||
probability: 0.1
|
||||
- alternative: *sobreatic
|
||||
probability: 0.05
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *pis
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
numeric_probability: 0.99
|
||||
alpha_probability: 0.01
|
||||
|
||||
blocks:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: bloc
|
||||
abbreviated: bl
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: a prop de
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: prop de
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: prop
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: a prop
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: proper
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: proper a
|
||||
probability: 0.05
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: proper
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: a prop
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: a prop d'aquí
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: a prop d'aqui
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: aquí
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: aqui
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: a prop meu
|
||||
in:
|
||||
default:
|
||||
canonical: a
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: dins
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: en
|
||||
probability: 0.2
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
cross_streets:
|
||||
and: *i
|
||||
amb: &amb
|
||||
canonical: amb
|
||||
a: &a
|
||||
canonical: a
|
||||
corner_of: &cantonada_de
|
||||
canonical: cantonada de
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
at_the_corner_of: &a_la_cantonada_de
|
||||
canonical: a la cantonada de
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
corner: &cantonada
|
||||
canonical: cantonada
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.55
|
||||
alternatives:
|
||||
- alternative: *amb
|
||||
probability: 0.2
|
||||
- alternative: *a
|
||||
probability: 0.1
|
||||
- alternative: *cantonada_de
|
||||
probability: 0.09
|
||||
- alternative: *a_la_cantonada_de
|
||||
probability: 0.05
|
||||
- alternative: *cantonada
|
||||
probability: 0.01
|
||||
|
||||
between:
|
||||
canonical: entre
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probabililty: 0.5
|
||||
|
||||
|
||||
po_boxes:
|
||||
apartat: &apartat
|
||||
canonical: apartat
|
||||
abbreviated: apt
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.4 # Apt No 1234
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *apartat
|
||||
numeric_probability: 0.9 # Apt 123
|
||||
alpha_probability: 0.05 # Apt A
|
||||
numeric_plus_alpha_probability: 0.04 # Apt 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Apt A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: codi postal
|
||||
abbreviated: cp
|
||||
sample: true
|
||||
canonical_probability: 0.01
|
||||
abbreviated_probability: 0.95
|
||||
sample_probability: 0.04
|
||||
|
||||
numeric:
|
||||
# Postcodes in Spain and Latin America are sometimes prefixed by CP
|
||||
direction: left
|
||||
|
||||
numeric_affix:
|
||||
affix: cp
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.7
|
||||
numeric_probability: 0.18
|
||||
numeric_affix_probability: 0.12
|
||||
strict_numeric: true
|
||||
|
||||
directions:
|
||||
right: &dreta
|
||||
canonical: dreta
|
||||
abbreviated: dta
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
left: &esquerra
|
||||
canonical: esquerra
|
||||
abbreviated: esq
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
rear: &posterior
|
||||
canonical: posterior
|
||||
abbreviated: pos
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
front: &front
|
||||
canonical: front
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *dreta
|
||||
probability: 0.45
|
||||
- alternative: *esquerra
|
||||
probability: 0.45
|
||||
- alternative: *posterior
|
||||
probability: 0.05
|
||||
- alternative: *front
|
||||
probability: 0.05
|
||||
|
||||
anteroposterior:
|
||||
alternatives:
|
||||
- alternative: *front
|
||||
probability: 0.5
|
||||
- alternative: *posterior
|
||||
probability: 0.5
|
||||
|
||||
lateral:
|
||||
alternatives:
|
||||
- alternative: *dreta
|
||||
probability: 0.5
|
||||
- alternative: *esquerra
|
||||
probability: 0.5
|
||||
|
||||
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &est
|
||||
canonical: est
|
||||
abbreviated: e
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &oest
|
||||
canonical: oest
|
||||
abbreviated: w
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: w
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &sud
|
||||
canonical: sud
|
||||
abbreviated: s
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *est
|
||||
probability: 0.25
|
||||
- alternative: *sud
|
||||
probability: 0.25
|
||||
- alternative: *oest
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
entrada: &entrada
|
||||
canonical: entrada
|
||||
abbreviated: entr
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Entrance 1, Entrance A, etc.
|
||||
alphanumeric:
|
||||
default: *entrada
|
||||
numeric_probability: 0.1 # e.g. Entrance 1
|
||||
alpha_probability: 0.85 # e.g. Entrnace A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *oest
|
||||
- alternative: *dreta
|
||||
- alternative: *esquerra
|
||||
- alternative: *posterior
|
||||
- alternative: *front
|
||||
|
||||
staircases:
|
||||
escala: &escala
|
||||
canonical: escala
|
||||
abbreviated: esc
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, Stair A, Stair 1, etc.
|
||||
default: *escala
|
||||
numeric_probability: 0.6 # e.g. Escalera 1
|
||||
alpha_probability: 0.35 # e.g. Escalera A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right # e.g. Escalera Izq
|
||||
direction_probability: 0.8
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *oest
|
||||
- alternative: *dreta
|
||||
- alternative: *esquerra
|
||||
- alternative: *posterior
|
||||
- alternative: *front
|
||||
|
||||
units:
|
||||
flat: &apartament
|
||||
canonical: apartament
|
||||
abbreviated: apmt
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
door: &porta
|
||||
canonical: porta
|
||||
abbreviated: pta
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
# If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B"
|
||||
null_phrase_probability: 0.15
|
||||
ordinal:
|
||||
direction: right
|
||||
gender: f
|
||||
direction_probability: 0.95 # Let it vary occasionally e.g. Porta 2a
|
||||
null_phrase_probability: 0.8 # Let e.g. 5a be the entire unit string
|
||||
numeric_probability: 0.25
|
||||
ordinal_probability: 0.75
|
||||
lletra: &lletra
|
||||
canonical: lletra
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
office: &oficina
|
||||
canonical: oficina
|
||||
abbreviated: of
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
# Another word for unit, used more in Colombia
|
||||
unitat: &unitat
|
||||
canonical: unitat
|
||||
abbreviated: un
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
lot: &lot
|
||||
canonical: lot
|
||||
abbreviated: lt
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
parcel: &parcella
|
||||
canonical: parcel·la
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
habitacio: &habitacio
|
||||
canonical: habitació
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
casa: &casa
|
||||
canonical: casa
|
||||
numeric:
|
||||
direction: left
|
||||
room: &sala
|
||||
canonical: sala
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *porta
|
||||
probability: 0.8
|
||||
sample: true
|
||||
alternatives:
|
||||
- alternative: *apartament
|
||||
probability: 0.1
|
||||
- alternative: *casa
|
||||
probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
add_direction_numeric: true # Only for numbers
|
||||
add_direction_standalone: true # A unit can be as simple as "D"
|
||||
|
||||
numeric_probability: 0.7 # e.g. Porta 1a
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Porta 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Porta A1
|
||||
alpha_probability: 0.28 # e.g. Porta A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
alpha:
|
||||
default: *porta
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *lletra
|
||||
probability: 0.12
|
||||
- alternative: *apartament
|
||||
probability: 0.05
|
||||
- alternative: *casa
|
||||
probability: 0.01
|
||||
- alternative: *unitat
|
||||
probability: 0.01
|
||||
- alternative: *habitacio
|
||||
probability: 0.01
|
||||
|
||||
zones:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *oficina
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *sala
|
||||
probability: 0.2
|
||||
|
||||
numeric_probability: 0.9 # e.g. Oficina 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Oficina 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Oficina A1
|
||||
alpha_probability: 0.08 # e.g. Oficina A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
alpha:
|
||||
default: *oficina
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *sala
|
||||
probability: 0.15
|
||||
- alternative: *lletra
|
||||
probability: 0.05
|
||||
|
||||
industrial:
|
||||
default: *lot
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *oficina
|
||||
probability: 0.3
|
||||
- alternative: *unitat
|
||||
probability: 0.19
|
||||
- alternative: *parcella
|
||||
probability: 0.01
|
||||
|
||||
numeric_probability: 0.9 # e.g. Lote 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Lote A1
|
||||
alpha_probability: 0.08 # e.g. Lote A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *sala
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *porta
|
||||
probability: 0.1
|
||||
|
||||
numeric_probability: 0.9 # e.g. Sala 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
|
||||
alpha_probability: 0.08 # e.g. Sala A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
alpha:
|
||||
default: *sala
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *porta
|
||||
probability: 0.08
|
||||
- alternative: *lletra
|
||||
probability: 0.02
|
||||
|
||||
allotments:
|
||||
lot:
|
||||
default: *lot
|
||||
numeric_probability: 0.8
|
||||
alphanumeric_probability: 0.1
|
||||
alpha_probability: 0.1
|
||||
parcel:
|
||||
default: *parcella
|
||||
numeric_probability: 0.3
|
||||
alphanumeric_probability: 0.3
|
||||
alpha_probability: 0.4
|
||||
lot_probability: 0.9
|
||||
parcel_probability: 0.06
|
||||
lot_plus_parcel_probability: 0.02
|
||||
parcel_plus_lot_probability: 0.02
|
||||
570
resources/addresses/cs.yaml
Normal file
570
resources/addresses/cs.yaml
Normal file
@@ -0,0 +1,570 @@
|
||||
# cs.yaml
|
||||
# -------
|
||||
# Czech language specification
|
||||
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.04
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
# Note: no combinations because of the house numbering scheme
|
||||
|
||||
numbers:
|
||||
default: &cislo
|
||||
canonical: číslo
|
||||
abbreviated: č
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "č."
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
and:
|
||||
default: &a
|
||||
canonical: a
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
conscription_numbers:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: číslo popisné
|
||||
abbreviated: "č.p."
|
||||
canonical_probability: 0.05
|
||||
abbreviated_probability: 0.85
|
||||
sample: true
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
cross_streets:
|
||||
and: *a
|
||||
at: &na
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner_of: &rohu
|
||||
canonical: rohu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &roh
|
||||
canonical: roh
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &na_rohu
|
||||
canonical: na rohu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *a
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *na
|
||||
probability: 0.1
|
||||
- alternative: *rohu
|
||||
probability: 0.1
|
||||
- alternative: *roh
|
||||
probability: 0.1
|
||||
- alternative: *na_rohu
|
||||
probability: 0.1
|
||||
|
||||
between:
|
||||
canonical: mezi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &patro
|
||||
canonical: patro
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
nadzemni_podlazi: &nadzemni_podlazi
|
||||
canonical: nadzemní podlaží
|
||||
abbreviated: np
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.8
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etaz: &etaz
|
||||
canonical: etáž
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
prizemi: &prizemi
|
||||
canonical: přízemí
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
podzemni_podlazi: &podzemni_podlazi
|
||||
canonical: podzemní podlaží
|
||||
abbreviated: pp
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.3
|
||||
# e.g. podzemní podlaží 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. pp1
|
||||
numeric_affix:
|
||||
affix: pp
|
||||
direction: left
|
||||
# e.g. 1. podzemní podlaží
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podzemni_podlazi
|
||||
"-1":
|
||||
default: *podzemni_podlazi
|
||||
"0":
|
||||
default: *prizemi
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *patro
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *patro
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *nadzemni_podlazi
|
||||
probability: 0.19
|
||||
- alternative: *etaz
|
||||
probability: 0.01
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: poblíž
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: v blízkém okolí
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: kolem
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
probability: 0.05
|
||||
nearby:
|
||||
default:
|
||||
canonical: poblíž
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.45
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: blízko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: v blízkosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tady poblíž
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tady
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: okolo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: v okolí
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
near_me:
|
||||
default:
|
||||
canonical: v blízkosti mně
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: v
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: ve
|
||||
probability: 0.3
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &prava
|
||||
canonical: pravá
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &leva
|
||||
canonical: levá
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *prava
|
||||
probability: 0.5
|
||||
- alternative: *leva
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &vychod
|
||||
canonical: východ
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zapad
|
||||
canonical: západ
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sever
|
||||
canonical: sever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &jih
|
||||
canonical: jih
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
probability: 0.25
|
||||
- alternative: *vychod
|
||||
probability: 0.25
|
||||
- alternative: *jih
|
||||
probability: 0.25
|
||||
- alternative: *zapad
|
||||
probability: 0.25
|
||||
entrances:
|
||||
vchod: &vchod
|
||||
canonical: vchod
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Wejście 1, Wejście A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *vchod
|
||||
numeric_probability: 0.1 # e.g. Wejście 1
|
||||
alpha_probability: 0.85 # e.g. Wejście A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
schodiste: &schodiste
|
||||
canonical: schodiště
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *schodiste
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
- alternative: *jih
|
||||
- alternative: *vychod
|
||||
- alternative: *zapad
|
||||
|
||||
po_boxes:
|
||||
postovni_prihradka: &postovni_prihradka
|
||||
canonical: poštovní přihrádka
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # poštovní přihrádka 1234
|
||||
alphanumeric:
|
||||
default: *postovni_prihradka
|
||||
numeric_probability: 0.9 # poštovní přihrádka 123
|
||||
alpha_probability: 0.05 # poštovní přihrádka A
|
||||
numeric_plus_alpha_probability: 0.04 # poštovní přihrádka 123G
|
||||
alpha_plus_numeric_probability: 0.01 # poštovní přihrádka A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
apartaman: &apartaman
|
||||
canonical: apartmán
|
||||
abbreviated: apt
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
pokoj: &pokoj
|
||||
canonical: pokoj
|
||||
abbreviated: pok
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
kancelar: &kancelar
|
||||
canonical: kancelář
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *apartaman
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *pokoj
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. apt. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. apt. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *pokoj
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *kancelar
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. pokoj 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pokoj 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
|
||||
alpha_probability: 0.03 # e.g. pokoj A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *pokoj
|
||||
numeric_probability: 0.95 # e.g. pokoj 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pok 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
|
||||
alpha_probability: 0.03 # e.g. pokoj A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
599
resources/addresses/da.yaml
Normal file
599
resources/addresses/da.yaml
Normal file
@@ -0,0 +1,599 @@
|
||||
# da.yaml
|
||||
# -------
|
||||
# Danish language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85
|
||||
alphanumeric_probability: 0.1
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- level
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.9
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- entrance
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.9
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.001
|
||||
|
||||
|
||||
numbers:
|
||||
default: &nummer
|
||||
canonical: nummer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *nummer
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &og
|
||||
canonical: og
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *og
|
||||
corner_of: &hjorne_af
|
||||
canonical: hjørne af
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &pa_hjornet_af
|
||||
canonical: på hjørnet af
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *og
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hjorne_af
|
||||
probability: 0.15
|
||||
- alternative: *pa_hjornet_af
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: mellem
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &sal
|
||||
canonical: sal
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etage: &etage
|
||||
canonical: etage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
stuen: &stuen
|
||||
canonical: stuen
|
||||
abbreviated: st
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
stueetage: &stueetage
|
||||
canonical: stueetage
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
sample_probability: 0.7
|
||||
kaelderen: &kaelderen
|
||||
canonical: kælderen
|
||||
abbreviated: kl
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
# e.g. 1 kælderen
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.8
|
||||
# e.g. k1
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: left
|
||||
# e.g. 1. kl
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kaelderen
|
||||
"-1":
|
||||
default: *kaelderen
|
||||
"0":
|
||||
default: *stuen
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *stueetage
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *sal
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *etage
|
||||
probability: 0.3
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: i nærheden af
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: tæt på
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: tæt ved
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
nearby:
|
||||
default:
|
||||
canonical: i nærheden
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.4
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: rundt her
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: nær her
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nær
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: omkring her
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tæt på her
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: nær mig
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: i nærheden af mig
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tæt på mig
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: i
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: om
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: på
|
||||
probability: 0.1
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
directions:
|
||||
right: &til_hojre
|
||||
canonical: til højre
|
||||
abbreviated: t.h
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: t.h
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &til_venstre
|
||||
canonical: til venstre
|
||||
abbreviated: t.v
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: t.v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
middle: &midt_for
|
||||
canonical: midt for
|
||||
abbreviated: m.f
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: m.f
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
alternatives:
|
||||
- alternative: *til_hojre
|
||||
probability: 0.45
|
||||
- alternative: *til_venstre
|
||||
probability: 0.45
|
||||
- alternative: *midt_for
|
||||
probability: 0.1
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &ost
|
||||
canonical: øst
|
||||
abbreviated: ø
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: ø
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &vest
|
||||
canonical: vest
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &syd
|
||||
canonical: syd
|
||||
abbreviated: s
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *ost
|
||||
probability: 0.25
|
||||
- alternative: *syd
|
||||
probability: 0.25
|
||||
- alternative: *vest
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
indgang: &indgang
|
||||
canonical: indgang
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *indgang
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
stiege: &stiege
|
||||
canonical: stiege
|
||||
abbreviated: stg
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
trappe: &trappe
|
||||
canonical: trappe
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *trappe
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *stiege
|
||||
probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *syd
|
||||
- alternative: *ost
|
||||
- alternative: *vest
|
||||
|
||||
po_boxes:
|
||||
postboks: &postboks
|
||||
canonical: postboks
|
||||
abbreviated: pb
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Pb No 1234
|
||||
boks: &boks
|
||||
canonical: boks
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Boks No 1234
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postboks
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *boks
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # Pb 123
|
||||
alpha_probability: 0.05 # Pb A
|
||||
numeric_plus_alpha_probability: 0.04 # Pb 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Pb A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
lejlighed: &lejlighed
|
||||
canonical: lejlighed
|
||||
abbreviated: ljd
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.5
|
||||
# Lejlighed nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
hus: &hus
|
||||
canonical: hus
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
vaerelse: &vaerelse
|
||||
canonical: værelse
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *lejlighed
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *hus
|
||||
probability: 0.1
|
||||
- alternative: *vaerelse
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. Lejlighed 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Lejl A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2R, 2L, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.5
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Lejlighed Rechts
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
728
resources/addresses/de.yaml
Normal file
728
resources/addresses/de.yaml
Normal file
@@ -0,0 +1,728 @@
|
||||
# de.yaml
|
||||
# -------
|
||||
# Note: this will only apply to the German language code, which encompasses Germany,
|
||||
# Austria, Switzerland (but not Swiss-German, which has its own language code),
|
||||
# Lichtenstein, Luxembourg (Luxembourgish has its own language code), and part of Belgium.
|
||||
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85
|
||||
alphanumeric_probability: 0.1
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
combinations:
|
||||
# e.g. 2/34, more common way to specify a unit number in German
|
||||
# if unit exists in the first place
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.8
|
||||
- separator: "-"
|
||||
probability: 0.1
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.05
|
||||
|
||||
|
||||
numbers:
|
||||
default: &nummer
|
||||
canonical: nummer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
gebaude: &gebaude
|
||||
canonical: gebäude
|
||||
abbreviated: geb
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.05
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric:
|
||||
default: *nummer
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *gebaude
|
||||
probability: 0.05
|
||||
|
||||
alphanumeric_phrase_probability: 0.05
|
||||
|
||||
conscription_numbers:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: konskriptionsnummer
|
||||
abbreviated: konskr. nr
|
||||
canonical_probability: 0.15
|
||||
abbreviated_probability: 0.65
|
||||
sample: true
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
and:
|
||||
default: &und
|
||||
canonical: und
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *und
|
||||
corner_of: &ecke_von
|
||||
canonical: ecke von
|
||||
at_the_corner_of: &an_der_ecke_von
|
||||
canonical: an der ecke von
|
||||
intersection:
|
||||
default: *und
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *ecke_von
|
||||
probability: 0.15
|
||||
- alternative: *an_der_ecke_von
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: zwischen
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &obergeschoss
|
||||
canonical: obergeschoss
|
||||
abbreviated: og
|
||||
sample: true
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: og
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.3
|
||||
numeric_affix_probability: 0.5
|
||||
ordinal_probability: 0.2
|
||||
etage: &etage
|
||||
canonical: etage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
stock: &stock
|
||||
canonical: stock
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.1
|
||||
ordinal_probability: 0.9
|
||||
erdgeschoss: &erdgeschoss
|
||||
canonical: erdgeschoss
|
||||
abbreviated: eg
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
untergeschoss: &untergeschoss
|
||||
canonical: untergeschoss
|
||||
abbreviated: ug
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
# e.g. Basement 1
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. 1ug
|
||||
numeric_affix:
|
||||
affix: ug
|
||||
direction: left
|
||||
# e.g. 1. UG
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
unterste_etage: &unterste_etage
|
||||
canonical: unterste etage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
oberste_etage: &oberste_etage
|
||||
canonical: oberste etage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *untergeschoss
|
||||
"-1":
|
||||
default: *untergeschoss
|
||||
"0":
|
||||
default: *erdgeschoss
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *unterste_etage
|
||||
probability: 0.1
|
||||
"top":
|
||||
default: *obergeschoss
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *stock
|
||||
probability: 0.1
|
||||
- alternative: *etage
|
||||
probability: 0.05
|
||||
- alternative: *oberste_etage
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *obergeschoss
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *stock
|
||||
probability: 0.1
|
||||
- alternative: *etage
|
||||
probability: 0.05
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: nähe
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: bei
|
||||
probability: 0.3
|
||||
- alternative:
|
||||
canonical: nah
|
||||
probability: 0.15
|
||||
- alternative:
|
||||
canonical: nahe an
|
||||
probability: 0.05
|
||||
nearby:
|
||||
default:
|
||||
canonical: hier in der nähe
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.4
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: in der nähe
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.3
|
||||
- alternative:
|
||||
canonical: in der nähe hier
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: in der nähe von
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nahe gelegen
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: hier in der gegend
|
||||
probability: 0.05
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: in meiner nähe
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: in der nähe zu mir
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: in
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: im
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: um
|
||||
probability: 0.2
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
directions:
|
||||
right: &rechts
|
||||
canonical: rechts
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: r
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &links
|
||||
canonical: links
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: l
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *rechts
|
||||
probability: 0.5
|
||||
- alternative: *links
|
||||
probability: 0.5
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &ost
|
||||
canonical: ost
|
||||
abbreviated: o
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &west
|
||||
canonical: west
|
||||
abbreviated: w
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: w
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &sud
|
||||
canonical: süd
|
||||
abbreviated: s
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *ost
|
||||
probability: 0.25
|
||||
- alternative: *sud
|
||||
probability: 0.25
|
||||
- alternative: *west
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
eingang: &eingang
|
||||
canonical: eingang
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *eingang
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
stiege: &stiege
|
||||
canonical: stiege
|
||||
abbreviated: stg
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
treppe: &treppe
|
||||
canonical: treppe
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stiege
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *treppe
|
||||
probability: 0.4
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *ost
|
||||
- alternative: *west
|
||||
|
||||
po_boxes:
|
||||
postfach: &postfach
|
||||
canonical: postfach
|
||||
abbreviated: pf
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # PF No 1234
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postfach
|
||||
numeric_probability: 0.9 # Apdo 123
|
||||
alpha_probability: 0.05 # Apdo A
|
||||
numeric_plus_alpha_probability: 0.04 # Apdo 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Apdo A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
halle: &halle
|
||||
canonical: halle
|
||||
numeric:
|
||||
direction: left
|
||||
wohnung: &wohnung
|
||||
canonical: wohnung
|
||||
abbreviated: whg
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.3
|
||||
plural:
|
||||
canonical: wohnungen
|
||||
numeric:
|
||||
direction: left
|
||||
# Wohnung nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
haus: &haus
|
||||
canonical: haus
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
wohnungsnummer: &wohnungsnummer
|
||||
canonical: wohnungsnummer
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
appartement: &appartement
|
||||
canonical: appartement
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
buro: &buro
|
||||
canonical: büro
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
zimmer: &zimmer
|
||||
canonical: zimmer
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *wohnung
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *wohnungsnummer
|
||||
probability: 0.1
|
||||
- alternative: *appartement
|
||||
probability: 0.05
|
||||
- alternative: *haus
|
||||
probability: 0.05
|
||||
|
||||
numeric_probability: 0.9 # e.g. Wohnung 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Wohnung A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2R, 2L, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Wohnung Rechts
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
zone:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *buro
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *zimmer
|
||||
probability: 0.1
|
||||
university:
|
||||
default: *halle
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *zimmer
|
||||
probability: 0.1
|
||||
|
||||
|
||||
countries:
|
||||
# Austria
|
||||
at:
|
||||
# Staircase and entrance numbers more common
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.3
|
||||
standalone_probability: 0.1
|
||||
staircase:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
entrance:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
unit:
|
||||
null_probability: 0.4
|
||||
alphanumeric_probability: 0.6
|
||||
|
||||
# Combined apartment numbers are very common
|
||||
combinations:
|
||||
# e.g. Neubaugasse 55/A/1/5
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- entrance
|
||||
- staircase
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.98
|
||||
- separator: "-"
|
||||
probability: 0.02
|
||||
probability: 0.9
|
||||
# e.g. Neubaugasse 55/1/5
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.98
|
||||
- separator: "-"
|
||||
probability: 0.02
|
||||
probability: 0.8
|
||||
# e.g. Neubaugasse 55/5
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
probability: 0.7
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.98
|
||||
- separator: "-"
|
||||
probability: 0.02
|
||||
|
||||
units:
|
||||
top: &top
|
||||
canonical: top
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &austria_units_alphanumeric
|
||||
default: *top
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *haus
|
||||
probability: 0.15
|
||||
- alternative: *wohnung
|
||||
probability: 0.05
|
||||
- alternative: *wohnungsnummer
|
||||
probability: 0.025
|
||||
- alternative: *appartement
|
||||
probability: 0.025
|
||||
368
resources/addresses/el.yaml
Normal file
368
resources/addresses/el.yaml
Normal file
@@ -0,0 +1,368 @@
|
||||
# el.yaml
|
||||
# -------
|
||||
# Greek language specification
|
||||
|
||||
|
||||
alphabet: αβγδεζηθικλμνξοπρστυφχψω
|
||||
alphabet_probability: 0.8
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.05
|
||||
|
||||
entrance:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
unit:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
|
||||
levels:
|
||||
orofos: &orofos
|
||||
canonical: όροφος
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
orofos_latin: &orofos_latin
|
||||
canonical: órofos
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
isogelo: &isogelo
|
||||
canonical: ισόγειο
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
isogelo_latin: &isogelo_latin
|
||||
canonical: isógeio
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
imiorofos: &imiorofos
|
||||
canonical: ημιώροφος
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
imiorofos_latin: &imiorofos_latin
|
||||
canonical: imiórofos
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
|
||||
ypogeio: &ypogeio
|
||||
canonical: υπόγειο
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.01
|
||||
ordinal_probability: 0.005
|
||||
ypogeio_latin: &ypogeio_latin
|
||||
canonical: ypógeio
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.01
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *ypogeio
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *ypogeio_latin
|
||||
probability: 0.1
|
||||
"-1":
|
||||
default: *ypogeio
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *ypogeio_latin
|
||||
probability: 0.1
|
||||
|
||||
half_floors:
|
||||
default: *imiorofos
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *imiorofos_latin
|
||||
probability: 0.1
|
||||
|
||||
"0":
|
||||
default: *isogelo
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *isogelo_latin
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *orofos
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *orofos_latin
|
||||
probability: 0.1
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
entrances:
|
||||
eisodos: &eisodos
|
||||
canonical: είσοδος
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
eisodos_latin: &eisodos_latin
|
||||
canonical: eísodos
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# είσοδος 1, etc.
|
||||
alphanumeric:
|
||||
default: *eisodos
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *eisodos_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.1
|
||||
alpha_probability: 0.9
|
||||
|
||||
staircases:
|
||||
skala: &skala
|
||||
canonical: σκάλα
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
skala_latin: &skala_latin
|
||||
canonical: skála
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, skála A, skála 1, etc.
|
||||
default: *skala
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *skala_latin
|
||||
probability: 0.1
|
||||
numeric_probability: 0.6 # e.g. skála 1
|
||||
alpha_probability: 0.35 # e.g. skála A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
po_boxes:
|
||||
tachydromiki_thyrida: &tachydromiki_thyrida
|
||||
canonical: ταχυδρομική θυρίδα
|
||||
abbreviated: τ.θ
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
tachydromiki_thyrida_latin: &tachydromiki_thyrida_latin
|
||||
canonical: tachydromikí thyrída
|
||||
abbreviated: t.th
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
default: *tachydromiki_thyrida
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *tachydromiki_thyrida_latin
|
||||
probability: 0.2
|
||||
numeric_probability: 0.9 # t.th 123
|
||||
alpha_probability: 0.05 # t.th А
|
||||
numeric_plus_alpha_probability: 0.04 # t.th 123А
|
||||
alpha_plus_numeric_probability: 0.01 # t.th А123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
diamerisma: &diamerisma
|
||||
canonical: διαμέρισμα
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
diamerisma_latin: &diamerisma_latin
|
||||
canonical: diamérisma
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
|
||||
domatio: &domatio
|
||||
canonical: δωμάτιο
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
domatio_latin: &domatio_latin
|
||||
canonical: domátio
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
|
||||
grafeiou: &grafeiou
|
||||
canonical: γραφείου
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
grafeiou_latin: &grafeiou_latin
|
||||
canonical: grafeíou
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *diamerisma
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *diamerisma_latin
|
||||
probability: 0.1
|
||||
- alternative: *domatio
|
||||
probability: 0.09
|
||||
- alternative: *domatio_latin
|
||||
probability: 0.01
|
||||
|
||||
numeric_probability: 0.9 # e.g. diamérisma 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||
alpha_probability: 0.04 # e.g. διαμέρισμα А
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
zone:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *grafeiou
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *grafeiou_latin
|
||||
probability: 0.1
|
||||
university:
|
||||
default: *domatio
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *domatio_latin
|
||||
probability: 0.1
|
||||
1468
resources/addresses/en.yaml
Normal file
1468
resources/addresses/en.yaml
Normal file
File diff suppressed because it is too large
Load Diff
1189
resources/addresses/es.yaml
Normal file
1189
resources/addresses/es.yaml
Normal file
File diff suppressed because it is too large
Load Diff
470
resources/addresses/et.yaml
Normal file
470
resources/addresses/et.yaml
Normal file
@@ -0,0 +1,470 @@
|
||||
# et.yaml
|
||||
# -------
|
||||
# Estonian language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.97
|
||||
alphanumeric_probability: 0.02
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.95
|
||||
- separator: " - "
|
||||
probability: 0.05
|
||||
probability: 0.7
|
||||
|
||||
|
||||
numbers:
|
||||
default: &number
|
||||
canonical: number
|
||||
abbreviated: nbr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *number
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &ja
|
||||
canonical: ja
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *ja
|
||||
corner_of: &nurgas
|
||||
canonical: nurgas
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &nurgal
|
||||
canonical: nurgal
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *ja
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *nurgas
|
||||
probability: 0.15
|
||||
- alternative: *nurgal
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: vahel
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &korrusel
|
||||
canonical: korrusel
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
kelder: &kelder
|
||||
canonical: kelder
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
standalone_probability: 1.0
|
||||
keldris: &keldris
|
||||
canonical: keldris
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. 1 keldris
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.8
|
||||
# e.g. k1
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: left
|
||||
# e.g. 1. keldris
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.05
|
||||
numeric_affix_probability: 0.9
|
||||
ordinal_probability: 0.05
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kelder
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *keldris
|
||||
probability: 0.15
|
||||
"-1":
|
||||
default: *kelder
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *keldris
|
||||
probability: 0.1
|
||||
- alternative: *korrusel
|
||||
probability: 0.05
|
||||
"1":
|
||||
default: *parter
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *korrusel
|
||||
probability: 0.5
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *korrusel
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: lähedal
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
nearby:
|
||||
default:
|
||||
canonical: lähedal
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: siin lähedal
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: siinkandis
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: lähedal mulle
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.7
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
|
||||
directions:
|
||||
right: &paremal
|
||||
canonical: paremal
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
paramale: &paremale
|
||||
canonical: paremale
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &vasakul
|
||||
canonical: vasakul
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
vasakule: &vasakule
|
||||
canonical: vasakule
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
alternatives:
|
||||
- alternative: *paremal
|
||||
probability: 0.25
|
||||
- alternative: *paremale
|
||||
probability: 0.25
|
||||
- alternative: *vasakul
|
||||
probability: 0.25
|
||||
- alternative: *vasakule
|
||||
probability: 0.25
|
||||
|
||||
cardinal_directions:
|
||||
east: &ida
|
||||
canonical: ida
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
west: &laas
|
||||
canonical: lääs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
north: &pohi
|
||||
canonical: põhi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
south: &louna
|
||||
canonical: lõuna
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
alternatives:
|
||||
- alternative: *pohi
|
||||
probability: 0.25
|
||||
- alternative: *ida
|
||||
probability: 0.25
|
||||
- alternative: *louna
|
||||
probability: 0.25
|
||||
- alternative: *laas
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
sissepaas: &sissepaas
|
||||
canonical: sissepääs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *sissepaas
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
trepikoda: &trepikoda
|
||||
canonical: trepikoda
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *trepikoda
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *pohi
|
||||
- alternative: *louna
|
||||
- alternative: *ida
|
||||
- alternative: *laas
|
||||
|
||||
po_boxes:
|
||||
postboks: &abonementpostkast
|
||||
canonical: abonementpostkast
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # abonementpostkast #1234
|
||||
kast: &kast
|
||||
canonical: kast
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Kast #1234
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *abonementpostkast
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *kast
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # 123
|
||||
alpha_probability: 0.05 # A
|
||||
numeric_plus_alpha_probability: 0.04 # 123G
|
||||
alpha_plus_numeric_probability: 0.01 # A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
korter: &korter
|
||||
canonical: korter
|
||||
abbreviated: k
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.3
|
||||
# Lejlighed nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
ruumi: &ruumi
|
||||
canonical: ruumi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *korter
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *ruumi
|
||||
probability: 0.1
|
||||
numeric_probability: 1.0 # e.g. korter 1
|
||||
|
||||
# Separate random probability for adding directions like 2P, 2V, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.005
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Korter vasakule
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
375
resources/addresses/eu.yaml
Normal file
375
resources/addresses/eu.yaml
Normal file
@@ -0,0 +1,375 @@
|
||||
# eu.yaml
|
||||
# -------
|
||||
# Basque language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
# If no floor number is specified
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.4
|
||||
alphanumeric_probability: 0.6
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- level
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.85
|
||||
- separator: "/"
|
||||
probability: 0.15
|
||||
probability: 0.7
|
||||
|
||||
|
||||
and:
|
||||
default: &eta
|
||||
canonical: eta
|
||||
abbreviated: "&"
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.1
|
||||
|
||||
house_numbers:
|
||||
# zenbakirik gabe (zk.g) addresses
|
||||
no_number:
|
||||
default:
|
||||
canonical: zenbakirik gabe
|
||||
abbreviated: zk.g
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.3
|
||||
|
||||
no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified
|
||||
|
||||
levels:
|
||||
floor: &solairua
|
||||
canonical: solairua
|
||||
abbreviated: sol
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. 2. solairua
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.25
|
||||
ordinal_probability: 0.75
|
||||
# Ground floor
|
||||
beheko_solairua: &beheko_solairua
|
||||
canonical: beheko solairua
|
||||
abbreviated: beheko sol
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
behe_solairua: &behe_solairua
|
||||
canonical: behe-solairua
|
||||
abbreviated: behe-sol
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
aliases:
|
||||
"0":
|
||||
default: *beheko_solairua
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *behe_solairua
|
||||
probability: 0.4
|
||||
- alternative: *solairua
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *solairua
|
||||
numeric_probability: 0.99
|
||||
alpha_probability: 0.01
|
||||
|
||||
blocks:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: blokea
|
||||
abbreviated: bl
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.2
|
||||
ordinal_probability: 0.8
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: gertu
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: gertuko
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: hemen gertu
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: hemen
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: me gertu
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.7
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
|
||||
cross_streets:
|
||||
and: *eta
|
||||
txoko: &txoko
|
||||
canonical: txoko
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
|
||||
intersection:
|
||||
default: *eta
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *txoko
|
||||
probability: 0.2
|
||||
|
||||
between:
|
||||
canonical: arteko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probabililty: 0.5
|
||||
|
||||
|
||||
po_boxes:
|
||||
posta_kutxa: &posta_kutxa
|
||||
canonical: posta-kutxa
|
||||
abbreviated: p.-ku
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *posta_kutxa
|
||||
numeric_probability: 0.9 # P.-Ku 123
|
||||
alpha_probability: 0.05 # P.-Ku A
|
||||
numeric_plus_alpha_probability: 0.04 # P.-Ku 123G
|
||||
alpha_plus_numeric_probability: 0.01 # P.-Ku A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: posta-kodea
|
||||
abbreviated: p.-k
|
||||
sample: true
|
||||
canonical_probability: 0.01
|
||||
abbreviated_probability: 0.9
|
||||
sample_probability: 0.09
|
||||
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
numeric_affix:
|
||||
affix: p.-k.
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.7
|
||||
numeric_probability: 0.18
|
||||
numeric_affix_probability: 0.12
|
||||
strict_numeric: true
|
||||
|
||||
directions:
|
||||
right: &eskuina
|
||||
canonical: eskuina
|
||||
abbreviated: esk
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: esk.
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.9
|
||||
numeric_affix_probability: 0.1
|
||||
left: &ezkerkada
|
||||
canonical: ezkerkada
|
||||
abbreviated: ezk
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: ezk.
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.9
|
||||
numeric_affix_probability: 0.1
|
||||
ezkerreko: &ezkerreko
|
||||
canonical: ezkerreko
|
||||
abbreviated: ezk.-ko
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alternatives:
|
||||
- alternative: *eskuina
|
||||
probability: 0.5
|
||||
- alternative: *ezkerkada
|
||||
probability: 0.5
|
||||
|
||||
|
||||
entrances:
|
||||
sarrera: &sarrera
|
||||
canonical: sarrera
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Sarrera 1, Sarrera A, etc.
|
||||
alphanumeric:
|
||||
default: *sarrera
|
||||
numeric_probability: 0.1 # e.g. Sarrera 1
|
||||
alpha_probability: 0.85 # e.g. Sarrera A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *eskuina
|
||||
- alternative: *ezkerreko
|
||||
|
||||
staircases:
|
||||
eskailera: &eskailera
|
||||
canonical: eskailera
|
||||
abbreviated: eskra
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, Eskra A, Eskra 1, etc.
|
||||
default: *eskailera
|
||||
numeric_probability: 0.6 # e.g. Eskra 1
|
||||
alpha_probability: 0.35 # e.g. Eskra A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left # e.g. Ezk.-ko Eskra
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *eskuina
|
||||
- alternative: *ezkerreko
|
||||
|
||||
units:
|
||||
flat: &apartamentu
|
||||
canonical: apartamentu
|
||||
abbreviated: aptu
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
# If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B"
|
||||
null_phrase_probability: 0.15
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.6
|
||||
ordinal_probability: 0.4
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *apartamentu
|
||||
|
||||
# Separate random probability for adding directions like 2. Ezk, 2 Esk, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
add_direction_numeric: true # Only for numbers
|
||||
add_direction_standalone: true # A unit can be as simple as "D"
|
||||
|
||||
numeric_probability: 0.7 # e.g. 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. A1
|
||||
alpha_probability: 0.28 # e.g. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
403
resources/addresses/fi.yaml
Normal file
403
resources/addresses/fi.yaml
Normal file
@@ -0,0 +1,403 @@
|
||||
# fi.yaml
|
||||
# -------
|
||||
# Finnish language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.97
|
||||
alphanumeric_probability: 0.02
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- staircase
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: " "
|
||||
probability: 0.8
|
||||
- separator: "-"
|
||||
probability: 0.1
|
||||
- separator: "/"
|
||||
probability: 0.05
|
||||
- separator: " - "
|
||||
probability: 0.05
|
||||
probability: 0.85
|
||||
|
||||
numbers:
|
||||
default: &numero
|
||||
canonical: numero
|
||||
abbreviated: nro
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.4
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.7
|
||||
numeric_affix_probability: 0.3
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *numero
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &ja
|
||||
canonical: ja
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *ja
|
||||
corner_of: &kulmassa
|
||||
canonical: kulmassa
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *ja
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *kulmassa
|
||||
probability: 0.3
|
||||
|
||||
between:
|
||||
canonical: välillä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &kerros
|
||||
canonical: kerros
|
||||
abbreviated: krs
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *kerros
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: lähellä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
nearby:
|
||||
default:
|
||||
canonical: lähistöllä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: lähellä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tässä lähellä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: täällä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: lähellä minua
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.7
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
|
||||
directions:
|
||||
right: &oikea
|
||||
canonical: oikea
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
oikealla: &oikealla
|
||||
canonical: oikealla
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &vasen
|
||||
canonical: vasen
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
vasemmalla: &vasemmalla
|
||||
canonical: vasemmalla
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
alternatives:
|
||||
- alternative: *oikea
|
||||
probability: 0.25
|
||||
- alternative: *oikealla
|
||||
probability: 0.25
|
||||
- alternative: *vasen
|
||||
probability: 0.25
|
||||
- alternative: *vasemmalla
|
||||
probability: 0.25
|
||||
|
||||
cardinal_directions:
|
||||
east: &itaan
|
||||
canonical: itään
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
west: &lansi
|
||||
canonical: länsi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
north: &pohja
|
||||
canonical: pohja
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
south: &etela
|
||||
canonical: etelä
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
alternatives:
|
||||
- alternative: *pohja
|
||||
probability: 0.25
|
||||
- alternative: *itaan
|
||||
probability: 0.25
|
||||
- alternative: *etela
|
||||
probability: 0.25
|
||||
- alternative: *lansi
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
sissepaas: &sisaankaynti
|
||||
canonical: sisäänkäynti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Portaikko 1, Portaikko A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *sisaankaynti
|
||||
numeric_probability: 0.1 # e.g. Portaikko 1
|
||||
alpha_probability: 0.85 # e.g. Portaikko A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
portaikko: &portaikko
|
||||
canonical: portaikko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *portaikko
|
||||
alpha_probability: 1.0
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *pohja
|
||||
- alternative: *etela
|
||||
- alternative: *itaan
|
||||
- alternative: *lansi
|
||||
|
||||
po_boxes:
|
||||
postilokero: &postilokero
|
||||
canonical: postilokero
|
||||
abbreviated: pl
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # PL #1234
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postilokero
|
||||
numeric_probability: 0.9 # 123
|
||||
alpha_probability: 0.05 # A
|
||||
numeric_plus_alpha_probability: 0.04 # 123G
|
||||
alpha_plus_numeric_probability: 0.01 # A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
asunto: &asunto
|
||||
canonical: asunto
|
||||
abbreviated: as
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.3
|
||||
# as nro 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
ruumi: &huone
|
||||
canonical: huone
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *asunto
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *huone
|
||||
probability: 0.1
|
||||
numeric_probability: 1.0 # e.g. as 1
|
||||
|
||||
# Separate random probability for adding directions like 2O, 2V, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.005
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. asunto
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
951
resources/addresses/fr.yaml
Normal file
951
resources/addresses/fr.yaml
Normal file
@@ -0,0 +1,951 @@
|
||||
# Note: default config is for France. Canadian, Swiss, Belgian, and other
|
||||
# conventions go in country overrides
|
||||
|
||||
components:
|
||||
level:
|
||||
# If no floor number is specified
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.8
|
||||
- separator: "-"
|
||||
probability: 0.1
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.005
|
||||
|
||||
numbers:
|
||||
default: &numero
|
||||
canonical: numéro
|
||||
abbreviated: "nº"
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
sample_exclude:
|
||||
- "#" # Used in numeric affix. Needs to be quoted, otherwise it's a comment
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
# Probabilities for numbers
|
||||
numeric_probability: 0.7
|
||||
numeric_affix_probability: 0.3
|
||||
|
||||
and:
|
||||
default: &and
|
||||
canonical: et
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.25
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
house_numbers:
|
||||
# sans numéro (s/n) addresses
|
||||
no_number:
|
||||
canonical: sans numéro
|
||||
abbreviated: s/n
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *numero
|
||||
|
||||
alphanumeric_phrase_probability: 0.01
|
||||
no_number_probability: 0.05 # With this probability, use sin número if no house_number is specified
|
||||
|
||||
levels:
|
||||
floor: &etage
|
||||
canonical: étage
|
||||
abbreviated: ét
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
ordinal_probability: 0.25
|
||||
niveau: &niveau
|
||||
canonical: niveau
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
ordinal_probability: 0.25
|
||||
bel_etage: &bel_etage
|
||||
canonical: bel étage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
etage_noble: &etage_noble
|
||||
canonical: étage noble
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
dernier_etage: &dernier_etage
|
||||
canonical: dernier étage
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
basement: &sous_sol
|
||||
canonical: sous-sol
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.99
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
sub_basement: &soubassement
|
||||
canonical: soubassement
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.99
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
mezzanine: &entresol
|
||||
canonical: entresol
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# Ground floor
|
||||
rez_de_chaussee: &rez_de_chaussee
|
||||
canonical: rez-de-chaussée
|
||||
abbreviated: rdc
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.3
|
||||
rez_de_chaussee_bas: &rez_de_chaussee_bas
|
||||
canonical: rez-de-chaussée bas
|
||||
abbreviated: rcb
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
rez_de_chaussee_haut: &rez_de_chaussee_haut
|
||||
canonical: rez-de-chaussée haut
|
||||
abbreviated: rch
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
parterre: &parterre
|
||||
canonical: parterre
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
rez_de_jardin: &rez_de_jardin
|
||||
canonical: rez-de-jardin
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *sous_sol
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *soubassement
|
||||
probability: 0.3995
|
||||
- alternative: *etage
|
||||
probability: 0.0005
|
||||
"-1":
|
||||
default: *sous_sol
|
||||
probability: 0.9995
|
||||
alternatives:
|
||||
- alternative: *etage
|
||||
probability: 0.0005
|
||||
half_floors:
|
||||
default: *entresol
|
||||
"0":
|
||||
default: *rez_de_chaussee
|
||||
probability: 0.74
|
||||
alternatives:
|
||||
- alternative: *rez_de_jardin
|
||||
probability: 0.01
|
||||
- alternative: *rez_de_chaussee_bas
|
||||
probability: 0.1
|
||||
- alternative: *rez_de_chaussee_haut
|
||||
probability: 0.1
|
||||
- alternative: *etage
|
||||
probability: 0.05
|
||||
"1":
|
||||
default: *etage
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *bel_etage
|
||||
probability: 0.1
|
||||
- alternative: *etage_noble
|
||||
probability: 0.1
|
||||
top:
|
||||
default: *etage
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *dernier_etage
|
||||
probability: 0.1
|
||||
|
||||
alphanumeric:
|
||||
default: *etage
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *niveau
|
||||
probability: 0.05
|
||||
numeric_probability: 0.99
|
||||
alpha_probability: 0.01
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
|
||||
cross_streets:
|
||||
# 26th & 6th Avenue
|
||||
and: *and
|
||||
# 26th @ Broadway
|
||||
a: &a
|
||||
canonical: à
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
au: &au
|
||||
canonical: au
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
corner_of: &langle_de
|
||||
canonical: l'angle de
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
at_the_corner_of: &a_langle_de
|
||||
canonical: à l'angle de
|
||||
|
||||
intersection:
|
||||
default: *and
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *a
|
||||
probability: 0.025
|
||||
- alternative: *au
|
||||
probability: 0.025
|
||||
- alternative: *langle_de
|
||||
probability: 0.15
|
||||
- alternative: *a_langle_de
|
||||
probability: 0.1
|
||||
|
||||
# 26th betw 5th Ave and 6th Ave
|
||||
between:
|
||||
canonical: entre
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5 # Probability of using parentheses e.g. (between 5th and 6th)
|
||||
|
||||
directions:
|
||||
right: &droit
|
||||
canonical: droit
|
||||
abbreviated: dr
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.7
|
||||
numeric_affix_probability: 0.3
|
||||
left: &gauche
|
||||
canonical: gauche
|
||||
abbreviated: g
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: g
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
rear: &arriere
|
||||
canonical: arrière
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
front: &avant
|
||||
canonical: avant
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *droit
|
||||
probability: 0.49
|
||||
- alternative: *gauche
|
||||
probability: 0.49
|
||||
- alternative: *arriere
|
||||
probability: 0.01
|
||||
- alternative: *avant
|
||||
probability: 0.01
|
||||
|
||||
anteroposterior:
|
||||
alternatives:
|
||||
- alternative: *avant
|
||||
probability: 0.5
|
||||
- alternative: *arriere
|
||||
probability: 0.5
|
||||
|
||||
lateral:
|
||||
alternatives:
|
||||
- alternative: *droit
|
||||
probability: 0.5
|
||||
- alternative: *gauche
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &est
|
||||
canonical: est
|
||||
abbreviated: e
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &ouest
|
||||
canonical: ouest
|
||||
abbreviated: o
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &sud
|
||||
canonical: sud
|
||||
abbreviated: s
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *est
|
||||
probability: 0.25
|
||||
- alternative: *sud
|
||||
probability: 0.25
|
||||
- alternative: *ouest
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
entrance: &entrance
|
||||
canonical: entrance
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Entrance 1, Entrance A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *entrance
|
||||
numeric_probability: 0.1 # e.g. Entrance 1
|
||||
alpha_probability: 0.85 # e.g. Entrnace A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
modifier:
|
||||
direction: right # e.g. Entrance Nord
|
||||
direction_probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *ouest
|
||||
- alternative: *droit
|
||||
- alternative: *gauche
|
||||
- alternative: *arriere
|
||||
- alternative: *avant
|
||||
|
||||
staircases:
|
||||
escalier: &escalier
|
||||
canonical: escalier
|
||||
abbreviated: esc
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, Escalier A, Esc 1, etc.
|
||||
default: *escalier
|
||||
numeric_probability: 0.6 # e.g. Escalier 1
|
||||
alpha_probability: 0.35 # e.g. Escalier A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right # e.g. Escalier Izq
|
||||
direction_probability: 0.9
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *ouest
|
||||
- alternative: *droit
|
||||
- alternative: *gauche
|
||||
- alternative: *arriere
|
||||
- alternative: *avant
|
||||
|
||||
|
||||
po_boxes:
|
||||
boite_postal: &boite_postal
|
||||
canonical: boîte postale
|
||||
abbreviated: bp
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # BP No 1234
|
||||
numeric_probability: 1.0
|
||||
case_postal: &case_postal
|
||||
canonical: case postale
|
||||
abbreviated: cp
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # CP No 1234
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *boite_postal
|
||||
numeric_probability: 0.9 # BP 123
|
||||
alpha_probability: 0.05 # BP A
|
||||
numeric_plus_alpha_probability: 0.04 # BP 123G
|
||||
alpha_plus_numeric_probability: 0.01 # BP A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
units:
|
||||
flat: &appartement
|
||||
canonical: appartement
|
||||
abbreviated: app
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
unit: &unite
|
||||
canonical: unité
|
||||
abbreviated: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
suite: &suite
|
||||
canonical: suite
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.4
|
||||
office: &bureau
|
||||
canonical: bureau
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.3
|
||||
door: &porte
|
||||
canonical: porte
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
room: &salle
|
||||
canonical: salle
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
chambre: &chambre
|
||||
canonical: chambre
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
boite: &boite
|
||||
canonical: boîte
|
||||
abbreviated: bte
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
lot: &lotissement
|
||||
canonical: lotissement
|
||||
abbreviated: lot
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
parcelle: &parcelle
|
||||
canonical: parcelle
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
allotments:
|
||||
lot:
|
||||
default: *lotissement
|
||||
numeric_probability: 0.8
|
||||
alphanumeric_probability: 0.1
|
||||
alpha_probability: 0.1
|
||||
parcel:
|
||||
default: *parcelle
|
||||
numeric_probability: 0.3
|
||||
alphanumeric_probability: 0.3
|
||||
alpha_probability: 0.4
|
||||
lot_probability: 0.9
|
||||
parcel_probability: 0.06
|
||||
lot_plus_parcel_probability: 0.02
|
||||
parcel_plus_lot_probability: 0.02
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *appartement
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
# e.g. just plain #3 or No. 4
|
||||
- alternative: *numero
|
||||
probability: 0.05
|
||||
- alternative: *porte
|
||||
probability: 0.095
|
||||
- alternative: *chambre
|
||||
probability: 0.005
|
||||
numeric_probability: 0.9 # e.g. Appartement 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Appartement A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2D, 2G, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Unité Gauche
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
zones:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *bureau
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *suite
|
||||
probability: 0.2
|
||||
|
||||
numeric_probability: 0.9 # e.g. Bureau 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Bureau 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Bureau A1
|
||||
alpha_probability: 0.08 # e.g. Bureau A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
industrial:
|
||||
default: *lotissement
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *bureau
|
||||
probability: 0.3
|
||||
- alternative: *unite
|
||||
probability: 0.19
|
||||
- alternative: *parcelle
|
||||
probability: 0.01
|
||||
|
||||
numeric_probability: 0.9 # e.g. Lote 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Lote A1
|
||||
alpha_probability: 0.08 # e.g. Lote A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *salle
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *porte
|
||||
probability: 0.1
|
||||
|
||||
numeric_probability: 0.9 # e.g. Salle 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Salle 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Salle A1
|
||||
alpha_probability: 0.08 # e.g. Salle A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: près de
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: à coté de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: proche de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: proches de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: a cote de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: pres de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: aux environs de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: à proximité de
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: a proximite de
|
||||
probability: 0.05
|
||||
nearby:
|
||||
default:
|
||||
canonical: proche
|
||||
probability: 0.4
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: à coté
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: a cote
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: près d'ici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: près dici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: pres d'ici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: pres dici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: près de là
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: pres de la
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: par ici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: dans les alentours
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: à proximité de là
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: a proximite de la
|
||||
probability: 0.05
|
||||
near_me:
|
||||
default:
|
||||
canonical: proche de chez moi
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: près de moi
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: pres de moi
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: à proximité de moi
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: a proximite de moi
|
||||
probability: 0.1
|
||||
in:
|
||||
default:
|
||||
canonical: à
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: en
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: a
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: dans
|
||||
probability: 0.1
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
countries:
|
||||
# Belgium
|
||||
be:
|
||||
units:
|
||||
alphanumeric:
|
||||
default: *boite
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *appartement
|
||||
probability: 0.1
|
||||
# e.g. just plain #3 or No. 4
|
||||
- alternative: *numero
|
||||
probability: 0.05
|
||||
- alternative: *porte
|
||||
probability: 0.095
|
||||
- alternative: *chambre
|
||||
probability: 0.005
|
||||
# Canada
|
||||
ca:
|
||||
components:
|
||||
|
||||
unit:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- unit
|
||||
- house_number
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.04
|
||||
- separator: "-"
|
||||
probability: 0.95
|
||||
- separator: " - "
|
||||
probability: 0.01
|
||||
probability: 0.1
|
||||
levels:
|
||||
numbering_starts_at: 1
|
||||
aliases:
|
||||
"1":
|
||||
# Have to do this because etage is numeric
|
||||
# and has keys like "numeric_probability" which
|
||||
# we don't want to infect rez_de_chausee when doing
|
||||
# a recursive merge
|
||||
default: *etage
|
||||
probability: 0.1
|
||||
alternatives:
|
||||
- alternative: *rez_de_chaussee
|
||||
probability: 0.8
|
||||
- alternative: *bel_etage
|
||||
probability: 0.05
|
||||
- alternative: *etage_noble
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
alphanumeric:
|
||||
# More common to use in in Canada, as in the US
|
||||
use_floor_probability: 0.35
|
||||
|
||||
po_boxes:
|
||||
alphanumeric:
|
||||
default: *case_postal
|
||||
# Switzerland
|
||||
ch:
|
||||
levels:
|
||||
aliases:
|
||||
"0":
|
||||
default: *parterre
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *rez_de_chaussee
|
||||
probability: 0.05
|
||||
- alternative: *etage
|
||||
probability: 0.05
|
||||
po_boxes:
|
||||
alphanumeric:
|
||||
default: *case_postal
|
||||
269
resources/addresses/he.yaml
Normal file
269
resources/addresses/he.yaml
Normal file
@@ -0,0 +1,269 @@
|
||||
# he.yaml
|
||||
# -------
|
||||
# Hebrew language specification
|
||||
|
||||
|
||||
alphabet: אבגדהוזחטיכךלמםנןסעפףצץקרשת
|
||||
alphabet_probability: 0.8
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.05
|
||||
|
||||
entrance:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
unit:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- entrance
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.7
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- entrance
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: " "
|
||||
probability: 0.5
|
||||
- separator: ""
|
||||
probability: 0.2
|
||||
- separator: "/"
|
||||
probability: 0.1
|
||||
- separator: "-"
|
||||
probability: 0.1
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.7
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
|
||||
levels:
|
||||
koma: &koma
|
||||
canonical: קומה
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
koma_latin: &koma_latin
|
||||
canonical: koma
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
komat_karka: &komat_karka
|
||||
canonical: קומת קרקע
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
komat_karka_latin: &komat_karka_latin
|
||||
canonical: komát karká
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
|
||||
martef: &martef
|
||||
canonical: מרתף
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.01
|
||||
ordinal_probability: 0.005
|
||||
martef_latin: &martef_latin
|
||||
canonical: martef
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: left
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.01
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *martef
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *martef_latin
|
||||
probability: 0.1
|
||||
"-1":
|
||||
default: *martef
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *martef_latin
|
||||
probability: 0.1
|
||||
"0":
|
||||
default: *komat_karka
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *komat_karka_latin
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *koma
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *koma_latin
|
||||
probability: 0.1
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
entrances:
|
||||
knisa: &knisa
|
||||
canonical: כניסה
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
knisa_latin: &knisa_latin
|
||||
canonical: knisa
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# вход 1, вход A, etc.
|
||||
alphanumeric:
|
||||
default: *knisa
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *knisa_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.1
|
||||
alpha_probability: 0.9
|
||||
|
||||
po_boxes:
|
||||
ta_doar: &ta_doar
|
||||
canonical: תיבת דואר
|
||||
abbreviated: ת.ד.
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ta_doar_latin: &ta_doar_latin
|
||||
canonical: abonementnyy pochtovyy yashchik
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
default: *ta_doar
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *ta_doar_latin
|
||||
probability: 0.2
|
||||
numeric_probability: 0.9 # ta doar 123
|
||||
alpha_probability: 0.05 # ta doar А
|
||||
numeric_plus_alpha_probability: 0.04 # ta doar 123А
|
||||
alpha_plus_numeric_probability: 0.01 # ta doar А123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
dira: &dira
|
||||
canonical: דירה
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
dira_latin: &dira_latin
|
||||
canonical: dira
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *dira
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *dira_latin
|
||||
probability: 0.1
|
||||
|
||||
numeric_probability: 0.9 # e.g. dira 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||
alpha_probability: 0.04 # e.g. dira А
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
586
resources/addresses/hr.yaml
Normal file
586
resources/addresses/hr.yaml
Normal file
@@ -0,0 +1,586 @@
|
||||
# hr.yaml
|
||||
# -------
|
||||
# Croatian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.7
|
||||
alphanumeric_probability: 0.3
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
# For unit types like 2/34
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
|
||||
|
||||
numbers:
|
||||
no_number:
|
||||
default:
|
||||
canonical: bez broja
|
||||
abbreviated: bb
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
|
||||
default: &broj
|
||||
canonical: broj
|
||||
abbreviated: br
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "br."
|
||||
whitespace_probability: 0.6
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
alphanumeric_phrase_probability: 0.05
|
||||
no_number_probability: 0.05
|
||||
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: i
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
|
||||
cross_streets:
|
||||
i: *i
|
||||
at: &na
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &ugao
|
||||
canonical: ugao
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner_of: &uglu
|
||||
canonical: uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu: &na_uglu
|
||||
canonical: na uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *na
|
||||
probability: 0.1
|
||||
- alternative: *uglu
|
||||
probability: 0.1
|
||||
- alternative: *na_uglu
|
||||
probability: 0.1
|
||||
- alternative: *ugao
|
||||
probability: 0.05
|
||||
|
||||
izmedu: &izmedu
|
||||
canonical: između
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *izmedu
|
||||
|
||||
levels:
|
||||
kat: &kat
|
||||
canonical: kat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etaza: &etaza
|
||||
canonical: etaža
|
||||
abbreviated: et
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
prizemlje: &prizemlje
|
||||
canonical: prizemlje
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
mezanino: &polukat
|
||||
canonical: polukat
|
||||
half_floors: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
sample: true
|
||||
# e.g. polukat 2
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. 2. entresuelo
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.1
|
||||
ordinal_probability: 0.2
|
||||
standalone_probability: 0.6
|
||||
podrum: &podrum
|
||||
canonical: podrum
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. подрум 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. подрум
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podrum
|
||||
"-1":
|
||||
default: *podrum
|
||||
# Special token for half-floors
|
||||
half_floors:
|
||||
default: *polukat
|
||||
"0":
|
||||
default: *prizemlje
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *parter
|
||||
probability: 0.4
|
||||
- alternative: *kat
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *kat
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *etaza
|
||||
probability: 0.05
|
||||
numeric_probability: 0.69 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: u blizini
|
||||
nearby:
|
||||
default:
|
||||
canonical: u blizini
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: u blizini ovdje
|
||||
probability: 0.3
|
||||
- alternative:
|
||||
canonical: oko ovdje
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: u blizini mene
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: u
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &desno
|
||||
canonical: desno
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &lijevo
|
||||
canonical: lijevo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.5
|
||||
- alternative: *lijevo
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &istok
|
||||
canonical: istok
|
||||
abbreviated: i
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: i
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zapad
|
||||
canonical: zapad
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sjever
|
||||
canonical: sjever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &jug
|
||||
canonical: jug
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sjever
|
||||
probability: 0.25
|
||||
- alternative: *istok
|
||||
probability: 0.23
|
||||
- alternative: *jug
|
||||
probability: 0.23
|
||||
- alternative: *zapad
|
||||
probability: 0.23
|
||||
|
||||
entrances:
|
||||
ulaz: &ulaz
|
||||
canonical: ulaz
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Ulaz 1, Ulaz A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ulaz
|
||||
numeric_probability: 0.1 # e.g. Ulaz 1
|
||||
alpha_probability: 0.85 # e.g. Ulaz A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
staircases:
|
||||
stubiste: &stubiste
|
||||
canonical: stubište
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stubiste
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.2
|
||||
- alternative: *lijevo
|
||||
probability: 0.2
|
||||
- alternative: *sjever
|
||||
probability: 0.15
|
||||
- alternative: *jug
|
||||
probability: 0.15
|
||||
- alternative: *istok
|
||||
probability: 0.15
|
||||
- alternative: *zapad
|
||||
probability: 0.15
|
||||
|
||||
po_boxes:
|
||||
postanski_pretinac: &postanski_pretinac
|
||||
canonical: poštanski pretinac
|
||||
abbreviated: p.p
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *postanski_pretinac
|
||||
numeric_probability: 0.9 # pp 123
|
||||
alpha_probability: 0.05 # p.p A
|
||||
numeric_plus_alpha_probability: 0.04 # pp 123G
|
||||
alpha_plus_numeric_probability: 0.01 # pp A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
stan: &stan
|
||||
canonical: stan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
apartman: &apartman
|
||||
canonical: apartman
|
||||
abbreviated: ap
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
soba: &soba
|
||||
canonical: soba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ured: &ured
|
||||
canonical: ured
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *stan
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *apartman
|
||||
probability: 0.3
|
||||
- alternative: *soba
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. stan. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. stan A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *soba
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *ured
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *soba
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
439
resources/addresses/hu.yaml
Normal file
439
resources/addresses/hu.yaml
Normal file
@@ -0,0 +1,439 @@
|
||||
# hu.yaml
|
||||
# -------
|
||||
# Hungarian language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.2
|
||||
standalone_probability: 0.05
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- level
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.55
|
||||
- separator: " "
|
||||
probability: 0.4
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.8
|
||||
|
||||
|
||||
numbers:
|
||||
default: &szam
|
||||
canonical: szám
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
and:
|
||||
default: &es
|
||||
canonical: és
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: &es_a
|
||||
canonical: és a
|
||||
canonical_probability: 0.9
|
||||
sample: true
|
||||
sample_probability: 0.1
|
||||
probability: 0.2
|
||||
- alternative: &es_az
|
||||
canonical: és az
|
||||
canonical_probability: 0.9
|
||||
sample: true
|
||||
sample_probability: 0.1
|
||||
probability: 0.2
|
||||
|
||||
cross_streets:
|
||||
and: *es
|
||||
corner_of: &sarkan
|
||||
canonical: sarkán
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *es
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *es_a
|
||||
probability: 0.1
|
||||
- alternative: *es_az
|
||||
probability: 0.1
|
||||
- alternative: *sarkan
|
||||
probability: 0.2
|
||||
|
||||
between:
|
||||
canonical: között
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &emelet
|
||||
canonical: emelet
|
||||
abbreviated: em
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.85
|
||||
sample_probability: 0.05
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.2
|
||||
roman_numeral_probability: 0.8
|
||||
numeric_probability: 0.1
|
||||
ordinal_probability: 0.9
|
||||
foldszint: &foldszint
|
||||
canonical: földszint
|
||||
abbreviated: fszt
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
felemelet: &felemelet
|
||||
canonical: félemelet
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
magasfoldszint: &magasfoldszint
|
||||
canonical: magasföldszint
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
pince: &pince
|
||||
canonical: pince
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
alagsor: &alagsor
|
||||
canonical: alagsor
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
felszuteren: &felszuteren
|
||||
canonical: félszuterén
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
szuteren: &szuteren
|
||||
canonical: szuterén
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *alagsor
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *pince
|
||||
probability: 0.3
|
||||
- alternative: *szuteren
|
||||
probability: 0.1
|
||||
"-1":
|
||||
default: *alagsor
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *pince
|
||||
probability: 0.3
|
||||
- alternative: *szuteren
|
||||
probability: 0.1
|
||||
- alternative: *felszuteren
|
||||
probability: 0.1
|
||||
|
||||
"0":
|
||||
default: *foldszint
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *emelet
|
||||
probability: 0.1
|
||||
|
||||
"1":
|
||||
default: *emelet
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *felemelet
|
||||
probability: 0.1
|
||||
|
||||
"2":
|
||||
default: *emelet
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *magasfoldszint
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *emelet
|
||||
numeric_probability: 0.59 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.4 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: közelében
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
nearby:
|
||||
default:
|
||||
canonical: közelben
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: közelemben
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.7
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
|
||||
directions:
|
||||
right: &jobb
|
||||
canonical: jobb
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &bal
|
||||
canonical: bal
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *jobb
|
||||
probability: 0.5
|
||||
- alternative: *bal
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &kelet
|
||||
canonical: kelet
|
||||
abbreviated: k
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &nyugat
|
||||
canonical: nyugat
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &eszak
|
||||
canonical: észak
|
||||
abbreviated: e
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &del
|
||||
canonical: dél
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *eszak
|
||||
probability: 0.25
|
||||
- alternative: *kelet
|
||||
probability: 0.25
|
||||
- alternative: *del
|
||||
probability: 0.25
|
||||
- alternative: *nyugat
|
||||
probability: 0.25
|
||||
|
||||
|
||||
po_boxes:
|
||||
postafiok: &postafiok
|
||||
canonical: postafiók
|
||||
abbreviated: pf
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric:
|
||||
default: *postafiok
|
||||
numeric_probability: 0.9 # Pf 123
|
||||
alpha_probability: 0.05 # Pf A
|
||||
numeric_plus_alpha_probability: 0.04 # Pf 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Pf A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
lakas: &lakas
|
||||
canonical: lakás
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.3
|
||||
ordinal_probability: 0.7
|
||||
iroda: &iroda
|
||||
canonical: iroda
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
szoba: &szoba
|
||||
canonical: szoba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *lakas
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *szoba
|
||||
probability: 0.1
|
||||
numeric_probability: 0.95 # e.g. m. 1
|
||||
numeric_plus_alpha_probability: 0.005 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.005 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. m. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.2
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *iroda
|
||||
numeric_probability: 0.95 # e.g. pokój 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
|
||||
alpha_probability: 0.03 # e.g. pokój A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university: *commercial_unit_types
|
||||
459
resources/addresses/is.yaml
Normal file
459
resources/addresses/is.yaml
Normal file
@@ -0,0 +1,459 @@
|
||||
# is.yaml
|
||||
# -------
|
||||
# Icelandic language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- level
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.9
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- entrance
|
||||
- unit
|
||||
label: unit
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.9
|
||||
- separator: " - "
|
||||
probability: 0.1
|
||||
probability: 0.001
|
||||
|
||||
|
||||
numbers:
|
||||
default: &numer
|
||||
canonical: númer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *numer
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &og
|
||||
canonical: og
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *og
|
||||
corner_of: &horn_of
|
||||
canonical: horn af
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &a_horinu_a
|
||||
canonical: á horninu á
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *og
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *horn_of
|
||||
probability: 0.15
|
||||
- alternative: *a_horinu_a
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: milli
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &haeo
|
||||
canonical: hæð
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
jarohaeo: &jarohaeo
|
||||
canonical: jarðhæð
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
sample_probability: 0.7
|
||||
kjallara: &kjallara
|
||||
canonical: kjallara
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. 1 kjallara
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.8
|
||||
# e.g. k1
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: left
|
||||
# e.g. 1. kjallara
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kjallara
|
||||
"-1":
|
||||
default: *kjallara
|
||||
"0":
|
||||
default: *jarohaeo
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *haeo
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: nálægt
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: nálægt
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: nálægt hér
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: hérna
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: hér
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: nálægt mér
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: í
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
|
||||
directions:
|
||||
right: &til_haegri
|
||||
canonical: til hægri
|
||||
abbreviated: t.h
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: t.h
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &til_vinstri
|
||||
canonical: til vinstri
|
||||
abbreviated: t.v
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: t.v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
alternatives:
|
||||
- alternative: *til_haegri
|
||||
probability: 0.5
|
||||
- alternative: *til_vinstri
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &austur
|
||||
canonical: austur
|
||||
abbreviated: a
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: a
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &vestur
|
||||
canonical: vestur
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &norour
|
||||
canonical: norður
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &suour
|
||||
canonical: suður
|
||||
abbreviated: s
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *norour
|
||||
probability: 0.25
|
||||
- alternative: *austur
|
||||
probability: 0.25
|
||||
- alternative: *suour
|
||||
probability: 0.25
|
||||
- alternative: *vestur
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
inngangur: &inngangur
|
||||
canonical: inngangur
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Inngangur 1, Inngangur A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *inngangur
|
||||
numeric_probability: 0.1 # e.g. Inngangur 1
|
||||
alpha_probability: 0.85 # e.g. Inngangur A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
stiege: &stigi
|
||||
canonical: stigi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stigi
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *norour
|
||||
- alternative: *suour
|
||||
- alternative: *austur
|
||||
- alternative: *vestur
|
||||
|
||||
po_boxes:
|
||||
postholf: &postholf
|
||||
canonical: pósthólf
|
||||
abbreviated: ph
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Ph Nr 1234
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postholf
|
||||
numeric_probability: 0.9 # Ph 123
|
||||
alpha_probability: 0.05 # Ph A
|
||||
numeric_plus_alpha_probability: 0.04 # Ph 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Ph A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
ibuo: &ibuo
|
||||
canonical: íbúð
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.5
|
||||
# íbúð nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *ibuo
|
||||
numeric_probability: 0.9 # e.g. íbúð 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. íbúð A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2R, 2L, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
673
resources/addresses/it.yaml
Normal file
673
resources/addresses/it.yaml
Normal file
@@ -0,0 +1,673 @@
|
||||
# it.yaml
|
||||
# -------
|
||||
# Italian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
# If no floor number is specified
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 1.0
|
||||
probability: 0.5
|
||||
|
||||
numbers:
|
||||
default: &numero
|
||||
canonical: numero
|
||||
abbreviated: "nº"
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.5
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "n."
|
||||
direction: left
|
||||
# Probabilities for numbers
|
||||
numeric_probability: 0.7
|
||||
numeric_affix_probability: 0.3
|
||||
|
||||
and:
|
||||
default: &e
|
||||
canonical: e
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.25
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
house_numbers:
|
||||
# sans numéro (s/n) addresses
|
||||
no_number:
|
||||
canonical: senza numero civico
|
||||
abbreviated: snc
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *numero
|
||||
|
||||
alphanumeric_phrase_probability: 0.01
|
||||
no_number_probability: 0.05 # With this probability, use sin número if no house_number is specified
|
||||
|
||||
levels:
|
||||
floor: &piano
|
||||
canonical: piano
|
||||
abbreviated: pº
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.15
|
||||
sample_probability: 0.25
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.95
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
digits:
|
||||
ascii_probability: 0.9
|
||||
roman_numeral_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.55
|
||||
ordinal_probability: 0.45
|
||||
livello: &livello
|
||||
canonical: livello
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.75
|
||||
ordinal_probability: 0.25
|
||||
piano_nobile: &piano_nobile
|
||||
canonical: piano nobile
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
piano_terra: &piano_terra
|
||||
canonical: piano terra
|
||||
abbreviated: p.t
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.25
|
||||
sample_probability: 0.25
|
||||
basement: &seminterrato
|
||||
canonical: seminterrato
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
standalone_probability: 0.99
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *seminterrato
|
||||
probability: 0.995
|
||||
alternatives:
|
||||
- alternative: *piano
|
||||
probability: 0.005
|
||||
"-1":
|
||||
default: *seminterrato
|
||||
probability: 0.9995
|
||||
alternatives:
|
||||
- alternative: *piano
|
||||
probability: 0.0005
|
||||
"0":
|
||||
default: *piano_terra
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *piano
|
||||
probability: 0.05
|
||||
"1":
|
||||
default: *piano
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *piano_nobile
|
||||
probability: 0.1
|
||||
|
||||
alphanumeric:
|
||||
default: *piano
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *livello
|
||||
probability: 0.05
|
||||
numeric_probability: 0.99
|
||||
alpha_probability: 0.01
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
cross_streets:
|
||||
# 26th & 6th Avenue
|
||||
and: *e
|
||||
# 26th @ Broadway
|
||||
a: &a
|
||||
canonical: a
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
corner_of: &angolo_di
|
||||
canonical: angolo di
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
corner: &angolo
|
||||
canonical: angolo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
at_the_corner_of: &all_angolo_tra
|
||||
canonical: all'angolo tra
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
intersection:
|
||||
default: *e
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *a
|
||||
probability: 0.05
|
||||
- alternative: *angolo_di
|
||||
probability: 0.15
|
||||
- alternative: *all_angolo_tra
|
||||
probability: 0.1
|
||||
|
||||
# 26th betw 5th Ave and 6th Ave
|
||||
between:
|
||||
canonical: tra
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5 # Probability of using parentheses e.g. (between 5th and 6th)
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: vicino a
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: presso a
|
||||
probability: 0.25
|
||||
nearby:
|
||||
default:
|
||||
canonical: vicino
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: qui vicino
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nelle vicinanze
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: intorno a qui
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: vicino a me
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: a
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: ad
|
||||
probability: 0.15
|
||||
- alternative:
|
||||
canonical: in
|
||||
probability: 0.15
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
directions:
|
||||
right: &destra
|
||||
canonical: destra
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
left: &sinistra
|
||||
canonical: sinistra
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
rear: &posteriore
|
||||
canonical: posteriore
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
front: &anteriore
|
||||
canonical: anteriore
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *destra
|
||||
probability: 0.49
|
||||
- alternative: *sinistra
|
||||
probability: 0.49
|
||||
- alternative: *posteriore
|
||||
probability: 0.01
|
||||
- alternative: *anteriore
|
||||
probability: 0.01
|
||||
|
||||
anteroposterior:
|
||||
alternatives:
|
||||
- alternative: *anteriore
|
||||
probability: 0.5
|
||||
- alternative: *posteriore
|
||||
probability: 0.5
|
||||
|
||||
lateral:
|
||||
alternatives:
|
||||
- alternative: *destra
|
||||
probability: 0.5
|
||||
- alternative: *sinistra
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &est
|
||||
canonical: est
|
||||
abbreviated: e
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &ovest
|
||||
canonical: ovest
|
||||
abbreviated: o
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &sud
|
||||
canonical: sud
|
||||
abbreviated: s
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *est
|
||||
probability: 0.25
|
||||
- alternative: *sud
|
||||
probability: 0.25
|
||||
- alternative: *ovest
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
entrance: &ingresso
|
||||
canonical: ingresso
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Ingresso 1, Ingresso A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ingresso
|
||||
numeric_probability: 0.1 # e.g. Ingresso 1
|
||||
alpha_probability: 0.85 # e.g. Ingresso A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
modifier:
|
||||
direction: right # e.g. Ingresso Nord
|
||||
direction_probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *ovest
|
||||
- alternative: *destra
|
||||
- alternative: *sinistra
|
||||
- alternative: *posteriore
|
||||
- alternative: *anteriore
|
||||
|
||||
staircases:
|
||||
scala: &scala
|
||||
canonical: scala
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, Scala A, Scala 1, etc.
|
||||
default: *scala
|
||||
numeric_probability: 0.6 # e.g. Scala 1
|
||||
alpha_probability: 0.35 # e.g. Scala A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right # e.g. Scala Destra
|
||||
direction_probability: 0.9
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *ovest
|
||||
- alternative: *destra
|
||||
- alternative: *sinistra
|
||||
- alternative: *posteriore
|
||||
- alternative: *anteriore
|
||||
|
||||
|
||||
po_boxes:
|
||||
casella_postale: &casella_postale
|
||||
canonical: casella postale
|
||||
abbreviated: cp
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # CP No 1234
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
default: *casella_postale
|
||||
numeric_probability: 0.9 # CP 123
|
||||
alpha_probability: 0.05 # CP A
|
||||
numeric_plus_alpha_probability: 0.04 # CP 123G
|
||||
alpha_plus_numeric_probability: 0.01 # CP A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
units:
|
||||
flat: &appartamento
|
||||
canonical: appartamento
|
||||
abbreviated: app
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
casa: &casa
|
||||
canonical: casa
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
unit: &unita
|
||||
canonical: unità
|
||||
abbreviated: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
office: &officina
|
||||
canonical: officina
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.3
|
||||
lotto: &lotto
|
||||
canonical: lotto
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
door: &porta
|
||||
canonical: porta
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
interno: &interno
|
||||
canonical: interno
|
||||
abbreviated: int
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
room: &sala
|
||||
canonical: sala
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *appartamento
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *interno
|
||||
probability: 0.1
|
||||
# e.g. just plain #3 or No. 4
|
||||
- alternative: *numero
|
||||
probability: 0.05
|
||||
- alternative: *casa
|
||||
probability: 0.05
|
||||
- alternative: *porta
|
||||
probability: 0.045
|
||||
- alternative: *sala
|
||||
probability: 0.005
|
||||
numeric_probability: 0.9 # e.g. Appartement 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Appartement A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2D, 2G, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Unité Gauche
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
zones:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *officina
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *sala
|
||||
probability: 0.2
|
||||
|
||||
numeric_probability: 0.9 # e.g. Bureau 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Bureau 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Bureau A1
|
||||
alpha_probability: 0.08 # e.g. Bureau A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
industrial:
|
||||
default: *lotto
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *officina
|
||||
probability: 0.3
|
||||
- alternative: *unita
|
||||
probability: 0.2
|
||||
|
||||
numeric_probability: 0.9 # e.g. Lotto 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Lotto 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Lotto A1
|
||||
alpha_probability: 0.08 # e.g. Lotto A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *sala
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *porta
|
||||
probability: 0.1
|
||||
|
||||
numeric_probability: 0.9 # e.g. Salle 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Salle 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Salle A1
|
||||
alpha_probability: 0.08 # e.g. Salle A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
161
resources/addresses/ja.yaml
Normal file
161
resources/addresses/ja.yaml
Normal file
@@ -0,0 +1,161 @@
|
||||
# ja.yaml
|
||||
# -------
|
||||
# Japanese language specification
|
||||
|
||||
whitespace: false
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.05
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 1.0
|
||||
conditional:
|
||||
- component: level
|
||||
probabilities:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.05
|
||||
- component: house_number
|
||||
probabilities:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
combinations:
|
||||
# Unit is just appended onto the house number
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 1.0
|
||||
probability: 1.0
|
||||
|
||||
numbers:
|
||||
default: &go
|
||||
canonical: 号
|
||||
numeric_affix:
|
||||
affix: 号
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
|
||||
blocks:
|
||||
alphanumeric:
|
||||
default: &ban
|
||||
canonical: 番
|
||||
numeric_affix:
|
||||
affix: 番
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: &banchi
|
||||
canonical: 番地
|
||||
numeric_affix:
|
||||
affix: 番地
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.1
|
||||
- alternative: &banchi_no
|
||||
canonical: 番地の
|
||||
numeric_affix:
|
||||
affix: 番地の
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.05
|
||||
numeric_probability: 1.0
|
||||
alphanumeric_phrase_probability: 0.4
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *go
|
||||
alphanumeric_phrase_probability: 0.4
|
||||
|
||||
levels:
|
||||
kai: &kai
|
||||
canonical: 階
|
||||
numeric_affix:
|
||||
affix: 階
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *kai
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
shishobako: &shishobako
|
||||
canonical: 私書箱
|
||||
numeric_affix:
|
||||
affix: 私書箱
|
||||
direction: left
|
||||
digits:
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *shishobako
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
metro_stations:
|
||||
alphanumeric:
|
||||
default: &eki
|
||||
canonical: 駅
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: 駅
|
||||
direction: right
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
alphanumeric_phrase_probability: 1.0
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: 〒
|
||||
numeric_affix:
|
||||
affix: 〒
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.1
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 0.9
|
||||
|
||||
units:
|
||||
alphanumeric:
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
180
resources/addresses/ja_rm.yaml
Normal file
180
resources/addresses/ja_rm.yaml
Normal file
@@ -0,0 +1,180 @@
|
||||
# ja_rm.yaml
|
||||
# ----------
|
||||
# Romaji (Romanized Japanese) language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.05
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 1.0
|
||||
conditional:
|
||||
- component: level
|
||||
probabilities:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.05
|
||||
- component: house_number
|
||||
probabilities:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
|
||||
combinations:
|
||||
# Unit is just appended onto the house number
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 1.0
|
||||
probability: 1.0
|
||||
|
||||
numbers:
|
||||
default: &go
|
||||
canonical: go
|
||||
numeric_affix:
|
||||
affix: -go
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
blocks:
|
||||
alphanumeric:
|
||||
default: &ban
|
||||
canonical: ban
|
||||
numeric_affix:
|
||||
affix: -ban
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: &banchi
|
||||
canonical: banchi
|
||||
numeric_affix:
|
||||
affix: -ban
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.1
|
||||
- alternative: &banchi_no
|
||||
canonical: banchi-no
|
||||
numeric_affix:
|
||||
affix: -banchi-no
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.05
|
||||
numeric_probability: 1.0
|
||||
alphanumeric_phrase_probability: 0.4
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *go
|
||||
alphanumeric_phrase_probability: 0.4
|
||||
|
||||
levels:
|
||||
kai: &kai
|
||||
canonical: kai
|
||||
numeric_affix:
|
||||
affix: -kai
|
||||
upper_case: false
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
gai: &gai
|
||||
canonical: gai
|
||||
numeric_affix:
|
||||
affix: -gai
|
||||
upper_case: false
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *kai
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *gai
|
||||
probability: 0.4
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
shishobako: &shishobako
|
||||
canonical: shishobako
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *shishobako
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
metro_stations:
|
||||
alphanumeric:
|
||||
default: &eki
|
||||
canonical: eki
|
||||
numeric:
|
||||
direction: right
|
||||
title_case: false
|
||||
numeric_affix:
|
||||
affix: -eki
|
||||
title_case: false
|
||||
direction: right
|
||||
numeric_affix_probability: 1.0
|
||||
alphanumeric_phrase_probability: 1.0
|
||||
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
# This should still be the default in Romaji
|
||||
default:
|
||||
canonical: 〒
|
||||
numeric_affix:
|
||||
affix: 〒
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.1
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 0.9
|
||||
|
||||
units:
|
||||
alphanumeric:
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
122
resources/addresses/ko.yaml
Normal file
122
resources/addresses/ko.yaml
Normal file
@@ -0,0 +1,122 @@
|
||||
# ko.yaml
|
||||
# -------
|
||||
# Korean language specification
|
||||
|
||||
whitespace: false
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.15
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
numbers:
|
||||
combinations:
|
||||
# Unit is just appended onto the house number
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 1.0
|
||||
probability: 1.0
|
||||
|
||||
numbers:
|
||||
default: &ho
|
||||
canonical: 호
|
||||
numeric_affix:
|
||||
affix: 호
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &ho_traditional
|
||||
canonical: 號
|
||||
numeric_affix:
|
||||
affix: 號
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.1
|
||||
|
||||
levels:
|
||||
cheung: &cheung
|
||||
canonical: 층
|
||||
numeric_affix:
|
||||
affix: 층
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *cheung
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
saseoham: &saseoham
|
||||
canonical: 사서함
|
||||
numeric_affix:
|
||||
affix: 사서함
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *saseoham
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default: &upyeon_beonho
|
||||
canonical: 우편번호
|
||||
numeric_affix:
|
||||
affix: 우편번호
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.9
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 0.1
|
||||
|
||||
units:
|
||||
alphanumeric:
|
||||
default: *ho
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *ho_traditional
|
||||
probability: 0.1
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
90
resources/addresses/ko_rm.yaml
Normal file
90
resources/addresses/ko_rm.yaml
Normal file
@@ -0,0 +1,90 @@
|
||||
# ko_rm.yaml
|
||||
# ----------
|
||||
# Romanized Korean language specification
|
||||
|
||||
whitespace: false
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.15
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
numbers:
|
||||
combinations:
|
||||
# Unit is just appended onto the house number
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 1.0
|
||||
probability: 1.0
|
||||
|
||||
numbers:
|
||||
default: &ho
|
||||
canonical: ho
|
||||
numeric_affix:
|
||||
affix: -ho
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
levels:
|
||||
cheung: &cheung
|
||||
canonical: cheung
|
||||
numeric_affix:
|
||||
affix: -cheung
|
||||
upper_case: false
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *cheung
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
saseoham: &saseoham
|
||||
canonical: saseoham
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
default: *saseoham
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
alphanumeric:
|
||||
default: *ho
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
391
resources/addresses/lt.yaml
Normal file
391
resources/addresses/lt.yaml
Normal file
@@ -0,0 +1,391 @@
|
||||
# lt.yaml
|
||||
# -------
|
||||
# Lithuanian language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.97
|
||||
alphanumeric_probability: 0.02
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.95
|
||||
- separator: " - "
|
||||
probability: 0.05
|
||||
probability: 0.8
|
||||
|
||||
|
||||
numbers:
|
||||
default: &numeris
|
||||
canonical: numeris
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
and:
|
||||
default: &ir
|
||||
canonical: ir
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
|
||||
cross_streets:
|
||||
and: *ir
|
||||
corner_of: &kampelis
|
||||
canonical: kampelis
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *ir
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *kampelis
|
||||
probability: 0.3
|
||||
|
||||
between:
|
||||
canonical: nuo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
|
||||
levels:
|
||||
aukstas: &aukstas
|
||||
canonical: aukštas
|
||||
abbreviated: auk
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
numeric_probability: 0.2
|
||||
ordinal_probability: 0.8
|
||||
aukste: &aukste
|
||||
<<: *aukstas
|
||||
canonical: aukšte
|
||||
# Ground floor
|
||||
pirmas_aukstas: &pirmas_aukstas
|
||||
canonical: pirmas aukštas
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
rusys: &rusys
|
||||
canonical: rūsys
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
standalone_probability: 1.0
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
rusyje: &rusyje
|
||||
canonical: rūsyje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. rūsyje 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. r1
|
||||
numeric_affix:
|
||||
affix: r
|
||||
direction: left
|
||||
# e.g. 1. rūsyje
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.1
|
||||
ordinal_probability: 0.4
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *rusyje
|
||||
"-1":
|
||||
default: *rusys
|
||||
"0": &ground_floor
|
||||
default: *pirmas_aukstas
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *aukste
|
||||
probability: 0.3
|
||||
- alternative: *aukstas
|
||||
probability: 0.1
|
||||
"1": *ground_floor
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *aukstas
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
directions:
|
||||
right: &desineje
|
||||
canonical: dešinėje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &kaireje
|
||||
canonical: kairėje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *desineje
|
||||
probability: 0.5
|
||||
- alternative: *kaireje
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &rytai
|
||||
canonical: rytai
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
west: &vakarai
|
||||
canonical: vakarai
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
north: &siaure
|
||||
canonical: šiaurė
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
south: &pietus
|
||||
canonical: pietūs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
alternatives:
|
||||
- alternative: *siaure
|
||||
probability: 0.25
|
||||
- alternative: *rytai
|
||||
probability: 0.25
|
||||
- alternative: *pietus
|
||||
probability: 0.25
|
||||
- alternative: *vakarai
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
wejscie: &iejimas
|
||||
canonical: įėjimas
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# įėjimas 1, įėjimas A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *iejimas
|
||||
numeric_probability: 0.1 # e.g. įėjimas 1
|
||||
alpha_probability: 0.85 # e.g. įėjimas A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
laiptai: &laiptai
|
||||
canonical: laiptai
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *laiptai
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *siaure
|
||||
- alternative: *rytai
|
||||
- alternative: *pietus
|
||||
- alternative: *vakarai
|
||||
|
||||
|
||||
po_boxes:
|
||||
pasto_dezute: &pasto_dezute
|
||||
canonical: pašto dėžutė
|
||||
abbreviated: p d
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.5
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # pašto dėžutė 1234
|
||||
alphanumeric:
|
||||
default: *pasto_dezute
|
||||
numeric_probability: 0.95 # P. d. 123
|
||||
alpha_probability: 0.01 # pašto dėžutė A
|
||||
numeric_plus_alpha_probability: 0.03 # P. d. 123G
|
||||
alpha_plus_numeric_probability: 0.01 # P. d. A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
butas: &butas
|
||||
canonical: butas
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
biuro: &biuro
|
||||
canonical: biuro
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
kambarys: &kambarys
|
||||
canonical: kambarys
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *butas
|
||||
numeric_probability: 0.9 # e.g. butas 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. butas A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *biuro
|
||||
numeric_probability: 0.95 # e.g. biuro 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. biuro 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. biuro A1
|
||||
alpha_probability: 0.03 # e.g. biuro A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *kambarys
|
||||
numeric_probability: 0.95 # e.g. kambarys 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. kambarys 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. kambarys A1
|
||||
alpha_probability: 0.03 # e.g. kambarys A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
403
resources/addresses/lv.yaml
Normal file
403
resources/addresses/lv.yaml
Normal file
@@ -0,0 +1,403 @@
|
||||
# lv.yaml
|
||||
# -------
|
||||
# Latvian language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.97
|
||||
alphanumeric_probability: 0.02
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.95
|
||||
- separator: " - "
|
||||
probability: 0.05
|
||||
probability: 0.2
|
||||
|
||||
|
||||
numbers:
|
||||
default: &numurs
|
||||
canonical: numurs
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
and:
|
||||
default: &un
|
||||
canonical: un
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
|
||||
cross_streets:
|
||||
and: *un
|
||||
corner_of: &sturis
|
||||
canonical: stūris
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &sturi
|
||||
canonical: stūrī
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *un
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *sturi
|
||||
probability: 0.2
|
||||
- alternative: *sturis
|
||||
probability: 0.1
|
||||
|
||||
between:
|
||||
canonical: starp
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
|
||||
levels:
|
||||
stavs: &stavs
|
||||
canonical: stāvs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
whitespace_probability: 0.5 # sometimes should be 2.stāvs
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
# Needs to be 1.0 so we don't get e.g. IIstāvs
|
||||
ordinal_suffix_probability: 1.0
|
||||
numeric_probability: 0.2
|
||||
ordinal_probability: 0.8
|
||||
|
||||
# Ground floor
|
||||
pirmais_stavs: &pirmais_stavs
|
||||
canonical: pirmais stāvs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
pagrabs: &pagrabs
|
||||
canonical: pagrabs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
standalone_probability: 1.0
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
pagraba: &pagraba
|
||||
canonical: pagraba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. pagraba 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. p1
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: left
|
||||
# e.g. 1. pagraba
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.1
|
||||
ordinal_probability: 0.4
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *pagraba
|
||||
"-1":
|
||||
default: *pagrabs
|
||||
"0": &ground_floor
|
||||
default: *pirmais_stavs
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *stavs
|
||||
probability: 0.4
|
||||
"1": *ground_floor
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *stavs
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
directions:
|
||||
right: &pa_labi
|
||||
canonical: pa labi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &pa_kreisi
|
||||
canonical: pa kreisi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *pa_labi
|
||||
probability: 0.5
|
||||
- alternative: *pa_kreisi
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &austrumu
|
||||
canonical: austrumu
|
||||
abbreviated: a
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.05
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: a
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &rietumu
|
||||
canonical: rietumu
|
||||
abbreviated: r
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.05
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: r
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &ziemelu
|
||||
canonical: ziemeļu
|
||||
abbreviated: z
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.05
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
|
||||
south: &dienvidu
|
||||
canonical: dienvidu
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.05
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *ziemelu
|
||||
probability: 0.25
|
||||
- alternative: *dienvidu
|
||||
probability: 0.25
|
||||
- alternative: *austrumu
|
||||
probability: 0.25
|
||||
- alternative: *rietumu
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
ieeja: &ieeja
|
||||
canonical: ieeja
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# ieeja 1, ieeja A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ieeja
|
||||
numeric_probability: 0.1 # e.g. ieeja 1
|
||||
alpha_probability: 0.85 # e.g. ieeja A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
kapnu: &kapnu
|
||||
canonical: kāpņu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
kapnu_telpa: &kapnu_telpa
|
||||
canonical: kāpņu telpa
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *kapnu
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *kapnu_telpa
|
||||
probability: 0.4
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *pa_labi
|
||||
- alternative: *pa_kreisi
|
||||
- alternative: *ziemelu
|
||||
- alternative: *dienvidu
|
||||
- alternative: *austrumu
|
||||
- alternative: *rietumu
|
||||
|
||||
|
||||
units:
|
||||
dzivoklis: &dzivoklis
|
||||
canonical: dzīvoklis
|
||||
abbreviated: dz
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.8
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
birojs: &birojs
|
||||
canonical: birojs
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
istaba: &istaba
|
||||
canonical: istaba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *dzivoklis
|
||||
numeric_probability: 0.9 # e.g. m. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. m. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *birojs
|
||||
numeric_probability: 0.95 # e.g. birojs 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. birojs 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. birojs A1
|
||||
alpha_probability: 0.03 # e.g. birojs A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *istaba
|
||||
numeric_probability: 0.95 # e.g. istaba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. istaba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. istaba A1
|
||||
alpha_probability: 0.03 # e.g. istaba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
563
resources/addresses/nb.yaml
Normal file
563
resources/addresses/nb.yaml
Normal file
@@ -0,0 +1,563 @@
|
||||
# nb.yaml
|
||||
# -------
|
||||
# Norwegian language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85
|
||||
alphanumeric_probability: 0.1
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
# Bolignummer
|
||||
-
|
||||
components:
|
||||
- level
|
||||
- unit
|
||||
label: unit
|
||||
zero_pad_digits: 2
|
||||
separators:
|
||||
- separator: ""
|
||||
probability: 1.0
|
||||
probability: 0.05
|
||||
|
||||
|
||||
numbers:
|
||||
default: &nummer
|
||||
canonical: nummer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *nummer
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &og
|
||||
canonical: og
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *og
|
||||
corner_of: &hjorne_av
|
||||
canonical: hjørne av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &pa_hjornet_av
|
||||
canonical: på hjørnet av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *og
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hjorne_av
|
||||
probability: 0.15
|
||||
- alternative: *pa_hjornet_av
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: mellom
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &etasje
|
||||
canonical: etasje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
hovedetasje: &hovedetasje
|
||||
canonical: hovedetasje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: h
|
||||
direction: left
|
||||
zero_pad: 2
|
||||
numeric_probability: 0.1
|
||||
numeric_affix_probability: 0.9
|
||||
underetasje: &underetasje
|
||||
canonical: underetasje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: u
|
||||
direction: left
|
||||
zero_pad: 2
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.1
|
||||
numeric_affix_probability: 0.9
|
||||
loftsetasje: &loftsetasje
|
||||
canonical: loftsetasje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: l
|
||||
direction: left
|
||||
zero_pad: 2
|
||||
numeric_probability: 0.1
|
||||
numeric_affix_probability: 0.9
|
||||
loft: &loft
|
||||
canonical: loft
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
kjeller: &kjeller
|
||||
canonical: kjeller
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. 1 kjeller
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.8
|
||||
# e.g. k01
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: left
|
||||
zero_pad: 2
|
||||
# e.g. 1. k
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.9
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.09
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kjeller
|
||||
"-1":
|
||||
default: *kjeller
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *etasje
|
||||
probability: 0.05
|
||||
- alternative: *underetasje
|
||||
probability: 0.1
|
||||
|
||||
"top":
|
||||
default: *etasje
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *loftsetasje
|
||||
probability: 0.1
|
||||
- alternative: *loft
|
||||
probability: 0.05
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *etasje
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *hovedetasje
|
||||
probability: 0.05
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: i nærheten av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: nær
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
nearby:
|
||||
default:
|
||||
canonical: i nærheten
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: rundt her
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: nær
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: nær meg
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: i nærheten av meg
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.4
|
||||
|
||||
in:
|
||||
default:
|
||||
canonical: i
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &hoyre
|
||||
canonical: høyre
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
sample_probability: 0.9
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: h
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &venstre
|
||||
canonical: venstre
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
sample_probability: 0.9
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
alternatives:
|
||||
- alternative: *hoyre
|
||||
probability: 0.5
|
||||
- alternative: *venstre
|
||||
probability: 0.5
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &ost
|
||||
canonical: øst
|
||||
abbreviated: ø
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: ø
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &vest
|
||||
canonical: vest
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &syd
|
||||
canonical: syd
|
||||
abbreviated: s
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *ost
|
||||
probability: 0.25
|
||||
- alternative: *syd
|
||||
probability: 0.25
|
||||
- alternative: *vest
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
inngang: &inngang
|
||||
canonical: inngang
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *inngang
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
stiege: &stiege
|
||||
canonical: stiege
|
||||
abbreviated: stg
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
trapp: &trapp
|
||||
canonical: trapp
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *trapp
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *stiege
|
||||
probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *syd
|
||||
- alternative: *ost
|
||||
- alternative: *vest
|
||||
|
||||
po_boxes:
|
||||
postboks: &postboks
|
||||
canonical: postboks
|
||||
abbreviated: pb
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Pb No 1234
|
||||
boks: &boks
|
||||
canonical: boks
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Boks No 1234
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postboks
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *boks
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # Pb 123
|
||||
alpha_probability: 0.05 # Pb A
|
||||
numeric_plus_alpha_probability: 0.04 # Pb 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Pb A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
leilighet: &leilighet
|
||||
canonical: leilighet
|
||||
abbreviated: leil
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.3
|
||||
# Lejlighed nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
hus: &hus
|
||||
canonical: hus
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
vaerelse: &vaerelse
|
||||
canonical: værelse
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *leilighet
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *hus
|
||||
probability: 0.1
|
||||
- alternative: *vaerelse
|
||||
probability: 0.1
|
||||
numeric_probability: 0.95 # e.g. Lejlighed 1
|
||||
alpha_probability: 0.05 # e.g. Lejl A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2H, 2V, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.005
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Lejlighed Venstre
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.2
|
||||
|
||||
# Use the actual floor phrase as long as the whole phrase is numeric
|
||||
# Has the effect of creating Bolignummer-style units
|
||||
use_floor_affix_unit_num_digits: 2
|
||||
572
resources/addresses/nl.yaml
Normal file
572
resources/addresses/nl.yaml
Normal file
@@ -0,0 +1,572 @@
|
||||
# nl.yaml
|
||||
# -------
|
||||
# Note: base config covers Dutch as spoken in the Netherlands
|
||||
# Belgium overrides go in country configs
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85
|
||||
alphanumeric_probability: 0.1
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: /
|
||||
probability: 0.9
|
||||
- separator: "-"
|
||||
probability: 0.1
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "-"
|
||||
probability: 0.9
|
||||
- separator: /
|
||||
probability: 0.1
|
||||
probability: 0.01
|
||||
|
||||
|
||||
and:
|
||||
default: &en
|
||||
canonical: en
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
numbers:
|
||||
default: &nummer
|
||||
canonical: nummer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *nummer
|
||||
alphanumeric_phrase_probability: 0.01
|
||||
|
||||
levels:
|
||||
verdieping: &verdieping
|
||||
canonical: verdieping
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
roman_numeral_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.5
|
||||
roman_numeral_probability: 0.3
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.7
|
||||
ordinal_probability: 0.3
|
||||
etage: &etage
|
||||
canonical: etage
|
||||
abbreviated: et
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
roman_numeral_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.5
|
||||
roman_numeral_probability: 0.3
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.7
|
||||
ordinal_probability: 0.3
|
||||
begane_grond: &begane_grond
|
||||
canonical: begane grond
|
||||
abbreviated: bg
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.3
|
||||
benedenverdieping: &benedenverdieping
|
||||
canonical: benedenverdieping
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parterre: &parterre
|
||||
canonical: parterre
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
gelijkvloers: &gelijkvloers
|
||||
canonical: gelijkvloers
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
het_gelijkvloers: &het_gelijkvloers
|
||||
canonical: het gelijkvloers
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
aliases:
|
||||
"0":
|
||||
default: *begane_grond
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *benedenverdieping
|
||||
probability: 0.35
|
||||
- alternative: *parterre
|
||||
probability: 0.04
|
||||
- alternative: *het_gelijkvloers
|
||||
probability: 0.005
|
||||
- alternative: *gelijkvloers
|
||||
probability: 0.005
|
||||
alphanumeric:
|
||||
default: *verdieping
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *etage
|
||||
probability: 0.01
|
||||
numeric_probability: 0.79 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.2 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: in de buurt van
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: bij
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nabij
|
||||
probability: 0.1
|
||||
nearby:
|
||||
default:
|
||||
canonical: in de buurt
|
||||
near_me:
|
||||
default:
|
||||
canonical: in de buurt van me
|
||||
|
||||
in:
|
||||
default:
|
||||
canonical: in
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: te
|
||||
probability: 0.4
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
|
||||
cross_streets:
|
||||
and: *en
|
||||
corner_of: &hoek_van
|
||||
canonical: hoek van
|
||||
at_the_corner_of: &op_de_hoek_van
|
||||
canonical: op de hoek van
|
||||
intersection:
|
||||
default: *en
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hoek_van
|
||||
probability: 0.15
|
||||
- alternative: *op_de_hoek_van
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: tussen
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
|
||||
entrances:
|
||||
ingang: &ingang
|
||||
canonical: ingang
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ingang
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
po_boxes:
|
||||
postbus: &postbus
|
||||
canonical: postbus
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
antwoordnummer: &antwoordnummer
|
||||
canonical: antwoordnummer
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *postbus
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *antwoordnummer
|
||||
probability: 0.2
|
||||
numeric_probability: 0.9 # 123
|
||||
alpha_probability: 0.05 # A
|
||||
numeric_plus_alpha_probability: 0.04 # 123G
|
||||
alpha_plus_numeric_probability: 0.01 # A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
directions:
|
||||
right: &rechts
|
||||
canonical: rechts
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: r
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &links
|
||||
canonical: links
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: l
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *rechts
|
||||
probability: 0.5
|
||||
- alternative: *links
|
||||
probability: 0.5
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &oost
|
||||
canonical: oost
|
||||
abbreviated: o
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: o
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
oosten: &oosten
|
||||
<<: *oost
|
||||
canonical: oosten
|
||||
|
||||
oostelijke: &oostelijke
|
||||
canonical: oostelijke
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
west: &west
|
||||
canonical: west
|
||||
abbreviated: w
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: w
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
westen: &westen
|
||||
<<: *west
|
||||
canonical: westen
|
||||
|
||||
westelijke: &westelijke
|
||||
canonical: westelijke
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
north: &noord
|
||||
canonical: noord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
noorden: &noorden
|
||||
<<: *noord
|
||||
canonical: noorden
|
||||
|
||||
noordelijke: &noordelijke
|
||||
canonical: noordelijke
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
south: &zuid
|
||||
canonical: zuid
|
||||
abbreviated: z
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
zuiden: &zuiden
|
||||
<<: *zuid
|
||||
canonical: zuiden
|
||||
|
||||
zuidelijke: &zuidelijke
|
||||
canonical: zuidelijke
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
alternatives:
|
||||
- alternative: *noord
|
||||
probability: 0.25
|
||||
- alternative: *oost
|
||||
probability: 0.25
|
||||
- alternative: *zuid
|
||||
probability: 0.25
|
||||
- alternative: *west
|
||||
probability: 0.25
|
||||
|
||||
|
||||
staircases:
|
||||
stiege: &stiege
|
||||
canonical: stiege
|
||||
abbreviated: stg
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
trap: &trap
|
||||
canonical: trap
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *trap
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *stiege
|
||||
probability: 0.4
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
units:
|
||||
appartement: &appartement
|
||||
canonical: appartement
|
||||
abbreviated: apt
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
kamer: &kamer
|
||||
canonical: kamer
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *appartement
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *kamer
|
||||
probability: 0.4
|
||||
numeric_probability: 0.9 # e.g. Apt 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. Apt A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2R, 2L, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Apt Rechts
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
|
||||
countries:
|
||||
be:
|
||||
components:
|
||||
unit:
|
||||
null_probability: 0.65
|
||||
alphanumeric_probability: 0.35
|
||||
|
||||
levels:
|
||||
verdieping: &verdieping_flemish
|
||||
canonical: verdieping
|
||||
abbreviated: verdiep
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.7
|
||||
ordinal_probability: 0.3
|
||||
|
||||
aliases:
|
||||
"0":
|
||||
default: *het_gelijkvloers
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *gelijkvloers
|
||||
probability: 0.5
|
||||
alphanumeric:
|
||||
default: *verdieping_flemish
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *etage
|
||||
probability: 0.1
|
||||
|
||||
units:
|
||||
bus: &bus
|
||||
canonical: bus
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric:
|
||||
default: *appartement
|
||||
probability: 0.1
|
||||
alternatives:
|
||||
- alternative: *bus
|
||||
probability: 0.7
|
||||
- alternative: *kamer
|
||||
probability: 0.2
|
||||
509
resources/addresses/pl.yaml
Normal file
509
resources/addresses/pl.yaml
Normal file
@@ -0,0 +1,509 @@
|
||||
# pl.yaml
|
||||
# -------
|
||||
# Polish language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.04
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.9
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
- separator: " - "
|
||||
probability: 0.05
|
||||
probability: 0.01
|
||||
|
||||
numbers:
|
||||
default: &numer
|
||||
canonical: numer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
dom: &dom
|
||||
canonical: dom
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric:
|
||||
default: *numer
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *dom
|
||||
probability: 0.4
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: i
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *i
|
||||
at: &w
|
||||
canonical: w
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner_of: &rogu
|
||||
canonical: rogu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &na_rogu
|
||||
canonical: na rogu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *w
|
||||
probability: 0.1
|
||||
- alternative: *rogu
|
||||
probability: 0.1
|
||||
- alternative: *na_rogu
|
||||
probability: 0.1
|
||||
|
||||
between:
|
||||
canonical: pomiędzy
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &pietro
|
||||
canonical: piętro
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
ordinal_suffix_probability: 0.6
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
suterena: &suterena
|
||||
canonical: suterena
|
||||
# e.g. suterena 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. s1
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: left
|
||||
# e.g. 1. suterena
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *suterena
|
||||
"-1":
|
||||
default: *suterena
|
||||
"0":
|
||||
default: *parter
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *pietro
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *pietro
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: w pobliżu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: blisko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: koło
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: niedaleko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: obok
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: przy
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
nearby:
|
||||
default:
|
||||
canonical: w pobliżu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: w pobliżu tutaj
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: wokół tutaj
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: blisko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: w pobliżu mnie
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: w
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: we
|
||||
probability: 0.3
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
directions:
|
||||
right: &prawo
|
||||
canonical: prawo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &lewo
|
||||
canonical: lewo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *prawo
|
||||
probability: 0.5
|
||||
- alternative: *lewo
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &wschod
|
||||
canonical: wschód
|
||||
abbreviated: w
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: w
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zachod
|
||||
canonical: zachód
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &polnoc
|
||||
canonical: północ
|
||||
abbreviated: pn
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: pn
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &poludnie
|
||||
canonical: południe
|
||||
abbreviated: pd
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: pd
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *polnoc
|
||||
probability: 0.25
|
||||
- alternative: *wschod
|
||||
probability: 0.25
|
||||
- alternative: *poludnie
|
||||
probability: 0.25
|
||||
- alternative: *zachod
|
||||
probability: 0.25
|
||||
|
||||
|
||||
entrances:
|
||||
wejscie: &wejscie
|
||||
canonical: wejście
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Wejście 1, Wejście A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *wejscie
|
||||
numeric_probability: 0.1 # e.g. Wejście 1
|
||||
alpha_probability: 0.85 # e.g. Wejście A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
schody: &schody
|
||||
canonical: schody
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *schody
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *polnoc
|
||||
- alternative: *poludnie
|
||||
- alternative: *wschod
|
||||
- alternative: *zachod
|
||||
|
||||
|
||||
po_boxes:
|
||||
skrytka_pocztowa: &skrytka_pocztowa
|
||||
canonical: skrytka pocztowa
|
||||
abbreviated: skr poczt
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Skr Poczt 1234
|
||||
alphanumeric:
|
||||
default: *skrytka_pocztowa
|
||||
numeric_probability: 0.9 # Skr Poczt 123
|
||||
alpha_probability: 0.05 # Skr Poczt A
|
||||
numeric_plus_alpha_probability: 0.04 # Skr Poczt 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Skr Poczt A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
mieszkanie: &mieszkanie
|
||||
canonical: mieszkanie
|
||||
abbreviated: m
|
||||
sample: true
|
||||
canonical_probability: 0.05
|
||||
abbreviated_probability: 0.9
|
||||
sample_probability: 0.05
|
||||
numeric:
|
||||
direction: left
|
||||
pokoj: &pokoj
|
||||
canonical: pokój
|
||||
abbreviated: pok
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *mieszkanie
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *pokoj
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. m. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. m. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *pokoj
|
||||
numeric_probability: 0.95 # e.g. pokój 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
|
||||
alpha_probability: 0.03 # e.g. pokój A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university: *commercial_unit_types
|
||||
1054
resources/addresses/pt.yaml
Normal file
1054
resources/addresses/pt.yaml
Normal file
File diff suppressed because it is too large
Load Diff
504
resources/addresses/ro.yaml
Normal file
504
resources/addresses/ro.yaml
Normal file
@@ -0,0 +1,504 @@
|
||||
# ro.yaml
|
||||
# -------
|
||||
# Romanian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
# If no floor number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.35
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.05
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.3
|
||||
alphanumeric_probability: 0.65
|
||||
standalone_probability: 0.05
|
||||
|
||||
numbers:
|
||||
default: &numar
|
||||
canonical: număr
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#" # e.g. #3, #2F, etc.
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
direction: left # affix goes on the number's left
|
||||
|
||||
# Probabilities for numbers
|
||||
numeric_probability: 0.9
|
||||
numeric_affix_probability: 0.1
|
||||
|
||||
and:
|
||||
default: &si
|
||||
canonical: și
|
||||
abbreviated: "&"
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.1
|
||||
|
||||
cross_streets:
|
||||
and: *si
|
||||
corner_of: &colt
|
||||
canonical: colț
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
at_the_corner_of: &la_coltul_de_pe
|
||||
canonical: la colțul de pe
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
intersection:
|
||||
default: *si
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *colt
|
||||
probability: 0.2
|
||||
- alternative: *la_coltul_de_pe
|
||||
probability: 0.1
|
||||
|
||||
between:
|
||||
canonical: între
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
parentheses_probabililty: 0.5
|
||||
|
||||
|
||||
house_numbers:
|
||||
# fara numar (FN) addresses
|
||||
no_number:
|
||||
default:
|
||||
canonical: fără număr
|
||||
abbreviated: fn
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.2
|
||||
alphanumeric:
|
||||
default: *numar
|
||||
|
||||
alphanumeric_phrase_probability: 0.7
|
||||
no_number_probability: 0.1 # With this probability, use fara numar if no house_number is specified
|
||||
|
||||
|
||||
|
||||
levels:
|
||||
floor: &etaj
|
||||
canonical: etaj
|
||||
abbreviated: et
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true # Occasionally add variation of "number", e.g. et. nr 2
|
||||
add_number_phrase_probability: 0.05
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
roman_numeral_probability: 0.2
|
||||
# Ground floor
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
aliases:
|
||||
"0":
|
||||
default: *parter
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *etaj
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *etaj
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
numeric_probability: 0.99
|
||||
alpha_probability: 0.01
|
||||
|
||||
blocks:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: bloc
|
||||
abbreviated: bl
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: in apropiere de
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: în apropiere
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: in apropiere
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: aproape de aici
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: aici
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: în jurul aici
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: in jurul aici
|
||||
probability: 0.05
|
||||
near_me:
|
||||
default:
|
||||
canonical: lângă mine
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: langa mine
|
||||
probability: 0.3
|
||||
in:
|
||||
default:
|
||||
canonical: din
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &dreapta
|
||||
canonical: dreapta
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
left: &stanga
|
||||
canonical: stânga
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
alternatives:
|
||||
- alternative: *dreapta
|
||||
probability: 0.5
|
||||
- alternative: *stanga
|
||||
probability: 0.5
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
east: &est
|
||||
canonical: est
|
||||
abbreviated: e
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: e
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &vest
|
||||
canonical: vest
|
||||
abbreviated: v
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &nord
|
||||
canonical: nord
|
||||
abbreviated: n
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &sud
|
||||
canonical: sud
|
||||
abbreviated: s
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.6
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
probability: 0.25
|
||||
- alternative: *est
|
||||
probability: 0.25
|
||||
- alternative: *sud
|
||||
probability: 0.25
|
||||
- alternative: *vest
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
entrada: &intrare
|
||||
canonical: intrare
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Intrare 1, Intare A, etc.
|
||||
alphanumeric:
|
||||
default: *intrare
|
||||
numeric_probability: 0.1 # e.g. Intrare 1
|
||||
alpha_probability: 0.85 # e.g. Intrare A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *vest
|
||||
- alternative: *dreapta
|
||||
- alternative: *stanga
|
||||
|
||||
staircases:
|
||||
scara: &scara
|
||||
canonical: scara
|
||||
abbreviated: sc
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
# For alphanumerics, Scara A, Scara 1, etc.
|
||||
default: *scara
|
||||
numeric_probability: 0.35 # e.g. Scara 1
|
||||
alpha_probability: 0.6 # e.g. Scara A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right # e.g. Scara Nord
|
||||
direction_probability: 0.8
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *nord
|
||||
- alternative: *sud
|
||||
- alternative: *est
|
||||
- alternative: *vest
|
||||
- alternative: *dreapta
|
||||
- alternative: *stanga
|
||||
|
||||
po_boxes:
|
||||
casuta_postala: &casuta_postala
|
||||
canonical: căsuță poștală
|
||||
abbreviated: cp
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.4 # Apdo No 1234
|
||||
numeric_probability: 1.0
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *casuta_postala
|
||||
numeric_probability: 0.9 # Apdo 123
|
||||
alpha_probability: 0.05 # Apdo A
|
||||
numeric_plus_alpha_probability: 0.04 # Apdo 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Apdo A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
apartament: &apartament
|
||||
canonical: apartament
|
||||
abbreviated: ap
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
sala: &sala
|
||||
canonical: sală
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
birou: &birou
|
||||
canonical: birou
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
lotul: &lotul
|
||||
canonical: lotul
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *apartament
|
||||
probability: 0.9
|
||||
sample: true
|
||||
alternatives:
|
||||
- alternative: *sala
|
||||
probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.1
|
||||
add_direction_numeric: true # Only for numbers
|
||||
|
||||
numeric_probability: 0.9 # e.g. ap 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. ap 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. ap A1
|
||||
alpha_probability: 0.08 # e.g. ap A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
zones:
|
||||
residential: *unit_alphanumeric
|
||||
commercial:
|
||||
default: *birou
|
||||
numeric_probability: 0.9 # e.g. Birou 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Birou 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Birou A1
|
||||
alpha_probability: 0.08 # e.g. Birou A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
industrial:
|
||||
default: *lotul
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *birou
|
||||
probability: 0.3
|
||||
- alternative: *sala
|
||||
probability: 0.2
|
||||
|
||||
numeric_probability: 0.9 # e.g. Lotul 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Lotul 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Lotul A1
|
||||
alpha_probability: 0.08 # e.g. Lotul A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *sala
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *birou
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. Sala 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
|
||||
alpha_probability: 0.08 # e.g. Sala A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
1171
resources/addresses/ru.yaml
Normal file
1171
resources/addresses/ru.yaml
Normal file
File diff suppressed because it is too large
Load Diff
603
resources/addresses/sk.yaml
Normal file
603
resources/addresses/sk.yaml
Normal file
@@ -0,0 +1,603 @@
|
||||
# sk.yaml
|
||||
# -------
|
||||
# Slovakian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.04
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
# Note: no combinations because of the house numbering scheme
|
||||
|
||||
|
||||
numbers:
|
||||
default: &cislo
|
||||
canonical: číslo
|
||||
abbreviated: č
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "č."
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
and:
|
||||
default: &a
|
||||
canonical: a
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
conscription_numbers:
|
||||
alphanumeric:
|
||||
default:
|
||||
canonical: súpisné číslo
|
||||
abbreviated: s.č.
|
||||
canonical_probability: 0.05
|
||||
abbreviated_probability: 0.85
|
||||
sample: true
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
cross_streets:
|
||||
and: *a
|
||||
at: &na
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner_of: &rohu
|
||||
canonical: rohu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &roh
|
||||
canonical: roh
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &na_rohu
|
||||
canonical: na rohu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *a
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *na
|
||||
probability: 0.1
|
||||
- alternative: *roh
|
||||
probability: 0.1
|
||||
- alternative: *rohu
|
||||
probability: 0.1
|
||||
- alternative: *na_rohu
|
||||
probability: 0.1
|
||||
|
||||
between:
|
||||
canonical: medzi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
levels:
|
||||
floor: &poschodie
|
||||
canonical: poschodie
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
podlazie: &podlazie
|
||||
canonical: podlažie
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
nadzemne_podlazie: &nadzemne_podlazie
|
||||
canonical: nadzemné podlažie
|
||||
abbreviated: np
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
abbreviated_probability: 0.8
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etaz: &etaz
|
||||
canonical: etáž
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
prizemie: &prizemie
|
||||
canonical: prízemie
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
podzemne_podlazie: &podzemne_podlazie
|
||||
canonical: podzemné podlažie
|
||||
abbreviated: pp
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.3
|
||||
# e.g. podzemné podlažie 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. pp1
|
||||
numeric_affix:
|
||||
affix: pp
|
||||
direction: left
|
||||
# e.g. 1. podzemné podlažie
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.985
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podzemne_podlazie
|
||||
"-1":
|
||||
default: *podzemne_podlazie
|
||||
"0":
|
||||
default: *prizemie
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *poschodie
|
||||
probability: 0.05
|
||||
- alternative: *podlazie
|
||||
probability: 0.05
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *poschodie
|
||||
probability: 0.45
|
||||
alternatives:
|
||||
- alternative: *podlazie
|
||||
probability: 0.35
|
||||
- alternative: *nadzemne_podlazie
|
||||
probability: 0.19
|
||||
- alternative: *etaz
|
||||
probability: 0.01
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: v blízkosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: v okolí
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: okolo
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
probability: 0.05
|
||||
nearby:
|
||||
default:
|
||||
canonical: blízkosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.4
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: blízko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: v blízkosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tady blízkosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tady
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: tu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: v blízkosti tu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
- alternative:
|
||||
canonical: v okolí
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.05
|
||||
near_me:
|
||||
default:
|
||||
canonical: v blízkosti mne
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: v
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: vo
|
||||
probability: 0.3
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &prava
|
||||
canonical: pravá
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &lava
|
||||
canonical: ľavá
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *prava
|
||||
probability: 0.5
|
||||
- alternative: *lava
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &vychod
|
||||
canonical: východ
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zapad
|
||||
canonical: západ
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sever
|
||||
canonical: sever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &juh
|
||||
canonical: juh
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
probability: 0.25
|
||||
- alternative: *vychod
|
||||
probability: 0.25
|
||||
- alternative: *juh
|
||||
probability: 0.25
|
||||
- alternative: *zapad
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
vchod: &vchod
|
||||
canonical: vchod
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Wejście 1, Wejście A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *vchod
|
||||
numeric_probability: 0.1 # e.g. Wejście 1
|
||||
alpha_probability: 0.85 # e.g. Wejście A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
schodisko: &schodisko
|
||||
canonical: schodisko
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *schodisko
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
- alternative: *juh
|
||||
- alternative: *vychod
|
||||
- alternative: *zapad
|
||||
|
||||
po_boxes:
|
||||
postova_priehradka: &postova_priehradka
|
||||
canonical: poštová priehradka
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # poštová priehradka 1234
|
||||
alphanumeric:
|
||||
default: *postova_priehradka
|
||||
numeric_probability: 0.9 # poštová priehradka 123
|
||||
alpha_probability: 0.05 # poštová priehradka A
|
||||
numeric_plus_alpha_probability: 0.04 # poštová priehradka 123G
|
||||
alpha_plus_numeric_probability: 0.01 # poštová priehradka A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
apartaman: &apartaman
|
||||
canonical: apartmán
|
||||
abbreviated: apt
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
izba: &izba
|
||||
canonical: izba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
kancelaria: &kancelaria
|
||||
canonical: kancelária
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *apartaman
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *izba
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. apt. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. apt. A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *izba
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *kancelaria
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. pokoj 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pokoj 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
|
||||
alpha_probability: 0.03 # e.g. pokoj A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *izba
|
||||
numeric_probability: 0.95 # e.g. pokoj 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. pok 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
|
||||
alpha_probability: 0.03 # e.g. pokoj A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
539
resources/addresses/sl.yaml
Normal file
539
resources/addresses/sl.yaml
Normal file
@@ -0,0 +1,539 @@
|
||||
# sl.yaml
|
||||
# -------
|
||||
# Slovenian language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.7
|
||||
alphanumeric_probability: 0.3
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
# For unit types like 2/34
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
|
||||
|
||||
numbers:
|
||||
no_number:
|
||||
default:
|
||||
canonical: brez številke
|
||||
abbreviated: brez št
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
|
||||
default: &stevilke
|
||||
canonical: številke
|
||||
abbreviated: št
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "št."
|
||||
whitespace_probability: 0.6
|
||||
direction: left
|
||||
numeric_probability: 0.6
|
||||
numeric_affix_probability: 0.4
|
||||
|
||||
alphanumeric_phrase_probability: 0.05
|
||||
no_number_probability: 0.05
|
||||
|
||||
|
||||
and:
|
||||
default: &in
|
||||
canonical: in
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
|
||||
cross_streets:
|
||||
i: *in
|
||||
at: &na
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &vogalu
|
||||
canonical: vogalu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_vogalu: &na_vogalu
|
||||
canonical: na vogalu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *in
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *na
|
||||
probability: 0.1
|
||||
- alternative: *vogalu
|
||||
probability: 0.15
|
||||
- alternative: *na_vogalu
|
||||
probability: 0.05
|
||||
|
||||
med: &med
|
||||
canonical: med
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *med
|
||||
|
||||
levels:
|
||||
nadstropje: &nadstropje
|
||||
canonical: nadstropje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
pritlicje: &pritlicje
|
||||
canonical: pritličje
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
parter: &parter
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
kleti: &kleti
|
||||
canonical: kleti
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. kleti 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. kleti
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kleti
|
||||
"-1":
|
||||
default: *kleti
|
||||
"0":
|
||||
default: *pritlicje
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *parter
|
||||
probability: 0.4
|
||||
- alternative: *nadstropje
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *nadstropje
|
||||
numeric_probability: 0.69 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: v bližini
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: pri
|
||||
probability: 0.4
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: v bližini
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: v bližini tukaj
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.3
|
||||
- alternative:
|
||||
canonical: okoli tukaj
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: tukaj
|
||||
probability: 0.1
|
||||
|
||||
near_me:
|
||||
default:
|
||||
canonical: blizu mene
|
||||
|
||||
# Don't worry about agreement
|
||||
in:
|
||||
default:
|
||||
canonical: v
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
right: &prav
|
||||
canonical: prav
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &levo
|
||||
canonical: levo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *prav
|
||||
probability: 0.5
|
||||
- alternative: *levo
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &vzhod
|
||||
canonical: vzhod
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zahod
|
||||
canonical: zahod
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sever
|
||||
canonical: sever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &jug
|
||||
canonical: jug
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
probability: 0.25
|
||||
- alternative: *vzhod
|
||||
probability: 0.23
|
||||
- alternative: *jug
|
||||
probability: 0.23
|
||||
- alternative: *zahod
|
||||
probability: 0.23
|
||||
|
||||
entrances:
|
||||
vhod: &vhod
|
||||
canonical: vhod
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Vhod 1, Vhod A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *vhod
|
||||
numeric_probability: 0.1 # e.g. Vhod 1
|
||||
alpha_probability: 0.85 # e.g. Vhod A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
staircases:
|
||||
stopnisce: &stopnisce
|
||||
canonical: stopnišče
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stopnisce
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *prav
|
||||
probability: 0.2
|
||||
- alternative: *levo
|
||||
probability: 0.2
|
||||
- alternative: *sever
|
||||
probability: 0.15
|
||||
- alternative: *jug
|
||||
probability: 0.15
|
||||
- alternative: *vzhod
|
||||
probability: 0.15
|
||||
- alternative: *zahod
|
||||
probability: 0.15
|
||||
|
||||
po_boxes:
|
||||
postni_predal: &postni_predal
|
||||
canonical: poštni predal
|
||||
abbreviated: p.p
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *postni_predal
|
||||
numeric_probability: 0.9 # pp 123
|
||||
alpha_probability: 0.05 # p.p A
|
||||
numeric_plus_alpha_probability: 0.04 # pp 123G
|
||||
alpha_plus_numeric_probability: 0.01 # pp A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
stanovanje: &stanovanje
|
||||
canonical: stanovanje
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
soba: &soba
|
||||
canonical: soba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
urad: &urad
|
||||
canonical: urad
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *stanovanje
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *soba
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. stanovanje 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. stanovanje A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *soba
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *urad
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *soba
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
953
resources/addresses/sr.yaml
Normal file
953
resources/addresses/sr.yaml
Normal file
@@ -0,0 +1,953 @@
|
||||
# sr.yaml
|
||||
# -------
|
||||
# Serbian language specification
|
||||
|
||||
alphabet: абвгдђежзијклљмнњопрстћуфхцчџш
|
||||
alphanumeric_probability: 0.7
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.8
|
||||
alphanumeric_probability: 0.2
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.7
|
||||
alphanumeric_probability: 0.3
|
||||
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
# For unit types like 2/34
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
|
||||
|
||||
|
||||
numbers:
|
||||
default: &broj
|
||||
canonical: број
|
||||
abbreviated: бр
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "бр."
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
alternatives:
|
||||
- alternative: &broj_latin
|
||||
canonical: broj
|
||||
abbreviated: br
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "br."
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: и
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &i_latin
|
||||
canonical: i
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
|
||||
cross_streets:
|
||||
i: *i
|
||||
i_latin: *i_latin
|
||||
at: &na
|
||||
canonical: на
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_latin: &na_latin
|
||||
canonical: na
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
corner: &ugao
|
||||
canonical: угао
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
ugao_latin: &ugao_latin
|
||||
canonical: ugao
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu: &na_uglu
|
||||
canonical: на углу
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu_latin: &na_uglu_latin
|
||||
canonical: na uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *i_latin
|
||||
probability: 0.05
|
||||
- alternative: *na
|
||||
probability: 0.075
|
||||
- alternative: *na_latin
|
||||
probability: 0.025
|
||||
- alternative: *ugao
|
||||
probability: 0.1
|
||||
- alternative: *ugao_latin
|
||||
probability: 0.05
|
||||
- alternative: *na_uglu
|
||||
probability: 0.025
|
||||
- alternative: *na_uglu_latin
|
||||
probability: 0.025
|
||||
izmedu: &izmedu
|
||||
canonical: између
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
izmedu_latin: &izmedu_latin
|
||||
canonical: između
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *izmedu
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *izmedu_latin
|
||||
probability: 0.1
|
||||
|
||||
levels:
|
||||
sprat: &sprat
|
||||
canonical: спрат
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
sprat_latin: &sprat_latin
|
||||
canonical: sprat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
kat: &kat
|
||||
canonical: кат
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
kat_latin: &kat_latin
|
||||
canonical: kat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etaza: &etaza
|
||||
canonical: етажа
|
||||
abbreviated: ет
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etaza_latin: &etaza_latin
|
||||
canonical: etaža
|
||||
abbreviated: et
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
prizemlje: &prizemlje
|
||||
canonical: приземље
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
prizemlje_latin: &prizemlje_latin
|
||||
canonical: prizemlje
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
parter: &parter
|
||||
canonical: партер
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
parter_latin: &parter_latin
|
||||
canonical: parter
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
|
||||
podrum: &podrum
|
||||
canonical: подрум
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. подрум 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. подрум
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
podrum_latin: &podrum_latin
|
||||
canonical: podrum
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. подрум 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. подрум
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podrum
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *podrum_latin
|
||||
probability: 0.2
|
||||
"-1":
|
||||
default: *podrum
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *podrum_latin
|
||||
probability: 0.2
|
||||
"0":
|
||||
default: *prizemlje
|
||||
probability: 0.45
|
||||
alternatives:
|
||||
- alternative: *prizemlje_latin
|
||||
probability: 0.05
|
||||
- alternative: *parter
|
||||
probability: 0.35
|
||||
- alternative: *parter_latin
|
||||
probability: 0.05
|
||||
- alternative: *sprat
|
||||
probability: 0.04
|
||||
- alternative: *sprat_latin
|
||||
probability: 0.01
|
||||
- alternative: *kat
|
||||
probability: 0.04
|
||||
- alternative: *kat_latin
|
||||
probability: 0.01
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *sprat
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *sprat_latin
|
||||
probability: 0.1
|
||||
- alternative: *kat
|
||||
probability: 0.15
|
||||
- alternative: *kat_latin
|
||||
probability: 0.05
|
||||
- alternative: *etaza
|
||||
probability: 0.04
|
||||
- alternative: *etaza_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.69 # With this probability, pick an integer
|
||||
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
directions:
|
||||
right: &desno
|
||||
canonical: десно
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
desno_latin: &desno_latin
|
||||
canonical: desno
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &levo
|
||||
canonical: лево
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
levo_latin: &levo_latin
|
||||
canonical: levo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.45
|
||||
- alternative: *desno_latin
|
||||
probability: 0.05
|
||||
- alternative: *levo
|
||||
probability: 0.45
|
||||
- alternative: *levo_latin
|
||||
probability: 0.05
|
||||
|
||||
cardinal_directions:
|
||||
east: &istok
|
||||
canonical: исток
|
||||
abbreviated: и
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: и
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
istok_latin: &istok_latin
|
||||
canonical: istok
|
||||
abbreviated: i
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: i
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &zapad
|
||||
canonical: запад
|
||||
abbreviated: з
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: з
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
zapad_latin: &zapad_latin
|
||||
canonical: zapad
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &sever
|
||||
canonical: север
|
||||
abbreviated: с
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: с
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
sever_latin: &sever_latin
|
||||
canonical: sever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &jug
|
||||
canonical: југ
|
||||
abbreviated: ј
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: ј
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
jug_latin: &jug_latin
|
||||
canonical: jug
|
||||
abbreviated: j
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: j
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
probability: 0.23
|
||||
- alternative: *sever_latin
|
||||
probability: 0.02
|
||||
- alternative: *istok
|
||||
probability: 0.23
|
||||
- alternative: *istok_latin
|
||||
probability: 0.02
|
||||
- alternative: *jug
|
||||
probability: 0.23
|
||||
- alternative: *jug_latin
|
||||
probability: 0.02
|
||||
- alternative: *zapad
|
||||
probability: 0.23
|
||||
- alternative: *zapad_latin
|
||||
probability: 0.02
|
||||
|
||||
entrances:
|
||||
ulaz: &ulaz
|
||||
canonical: улаз
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
ulaz_latin: &ulaz_latin
|
||||
canonical: ulaz
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Ulaz 1, Ulaz A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ulaz
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *ulaz_latin
|
||||
probability: 0.2
|
||||
numeric_probability: 0.1 # e.g. Ulaz 1
|
||||
alpha_probability: 0.85 # e.g. Ulaz A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
|
||||
staircases:
|
||||
stepeniste: &stepeniste
|
||||
canonical: степениште
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
stepeniste_latin: &stepeniste_latin
|
||||
canonical: stepenište
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *stepeniste
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *stepeniste_latin
|
||||
probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *desno
|
||||
probability: 0.19
|
||||
- alternative: *desno_latin
|
||||
probability: 0.01
|
||||
- alternative: *levo
|
||||
probability: 0.19
|
||||
- alternative: *levo_latin
|
||||
probability: 0.01
|
||||
- alternative: *sever
|
||||
probability: 0.14
|
||||
- alternative: *sever_latin
|
||||
probability: 0.01
|
||||
- alternative: *jug
|
||||
probability: 0.14
|
||||
- alternative: *jug_latin
|
||||
probability: 0.01
|
||||
- alternative: *istok
|
||||
probability: 0.14
|
||||
- alternative: *istok_latin
|
||||
probability: 0.01
|
||||
- alternative: *zapad
|
||||
probability: 0.14
|
||||
- alternative: *zapad_latin
|
||||
probability: 0.01
|
||||
|
||||
po_boxes:
|
||||
postanski_fah: &postanski_fah
|
||||
canonical: поштански фах
|
||||
abbreviated: пф
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
|
||||
postanski_fah_latin: &postanski_fah_latin
|
||||
canonical: poštanski fah
|
||||
abbreviated: pf
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
|
||||
postanski_pretinac: &postanski_pretinac
|
||||
canonical: поштански претинац
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.5
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
postanski_pretinac_latin: &postanski_pretinac_latin
|
||||
canonical: poštanski pretinac
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
postanski_pregradak: &postanski_pregradak
|
||||
canonical: поштански преградак
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.5
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
postanski_pregradak_latin: &postanski_pregradak_latin
|
||||
canonical: poštanski pregradak
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *postanski_fah
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *postanski_fah_latin
|
||||
probability: 0.05
|
||||
- alternative: *postanski_pretinac
|
||||
probability: 0.1
|
||||
- alternative: *postanski_pretinac_latin
|
||||
probability: 0.05
|
||||
- alternative: *postanski_pregradak
|
||||
probability: 0.075
|
||||
- alternative: *postanski_pregradak_latin
|
||||
probability: 0.025
|
||||
numeric_probability: 0.9 # pf 123
|
||||
alpha_probability: 0.05 # pf A
|
||||
numeric_plus_alpha_probability: 0.04 # pf 123G
|
||||
alpha_plus_numeric_probability: 0.01 # pf A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
stan: &stan
|
||||
canonical: стан
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
stan_latin: &stan_latin
|
||||
canonical: stan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
apartman: &apartman
|
||||
canonical: апартман
|
||||
abbreviated: ап
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
apartman_latin: &apartman_latin
|
||||
canonical: apartman
|
||||
abbreviated: ap
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
soba: &soba
|
||||
canonical: соба
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
soba_latin: &soba_latin
|
||||
canonical: soba
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
kancelarija: &kancelarija
|
||||
canonical: канцеларија
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
kancelarija_latin: &kancelarija_latin
|
||||
canonical: kancelarija
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *stan
|
||||
probability: 0.5
|
||||
alternatives:
|
||||
- alternative: *stan_latin
|
||||
probability: 0.1
|
||||
- alternative: *apartman
|
||||
probability: 0.2
|
||||
- alternative: *apartman_latin
|
||||
probability: 0.05
|
||||
- alternative: *soba
|
||||
probability: 0.1
|
||||
- alternative: *soba_latin
|
||||
probability: 0.05
|
||||
numeric_probability: 0.9 # e.g. stan. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. stan A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.01
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *soba
|
||||
probability: 0.55
|
||||
alternatives:
|
||||
- alternative: *soba_latin
|
||||
probability: 0.05
|
||||
- alternative: *kancelarija
|
||||
probability: 0.35
|
||||
- alternative: *kancelarija_latin
|
||||
probability: 0.05
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *soba
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *soba_latin
|
||||
probability: 0.1
|
||||
numeric_probability: 0.95 # e.g. soba 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
|
||||
alpha_probability: 0.03 # e.g. soba A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
795
resources/addresses/sv.yaml
Normal file
795
resources/addresses/sv.yaml
Normal file
@@ -0,0 +1,795 @@
|
||||
# sv.yaml
|
||||
# -------
|
||||
# Swedish language specification.
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85
|
||||
alphanumeric_probability: 0.1
|
||||
standalone_probability: 0.05
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
numbers:
|
||||
default: &nummer
|
||||
canonical: nummer
|
||||
abbreviated: nr
|
||||
sample: true
|
||||
# Probabilities
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
sample_exclude:
|
||||
- "#"
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "#"
|
||||
direction: left
|
||||
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *nummer
|
||||
|
||||
alphanumeric_phrase_probability: 0.0001
|
||||
|
||||
|
||||
and:
|
||||
default: &och
|
||||
canonical: och
|
||||
abbreviated: "&"
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.75
|
||||
sample: true
|
||||
sample_probability: 0.05
|
||||
|
||||
cross_streets:
|
||||
and: *och
|
||||
corner_of: &hornet_av
|
||||
canonical: hörnet av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
at_the_corner_of: &i_hornet_av
|
||||
canonical: i hörnet av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *och
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hornet_av
|
||||
probability: 0.15
|
||||
- alternative: *i_hornet_av
|
||||
probability: 0.15
|
||||
|
||||
between:
|
||||
canonical: mellan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
|
||||
|
||||
levels:
|
||||
vaningen: &vaningen
|
||||
canonical: våningen
|
||||
abbreviated: vån
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
ordinal_probability: 1.0
|
||||
vaning: &vaning
|
||||
canonical: våning
|
||||
abbreviated: vån
|
||||
sample: true
|
||||
canonical_probability: 0.5
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
ordinal:
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
numeric_probability: 0.8
|
||||
ordinal_probability: 0.2
|
||||
plan: &plan
|
||||
canonical: plan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
entreplan: &entreplan
|
||||
canonical: entréplan
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
trappa_upp: &trappa_upp
|
||||
canonical: trappa upp
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
number_min_abs_value: 2
|
||||
number_max_abs_value: 2
|
||||
number_subtract_abs_value: 1
|
||||
numeric_probability: 0.8
|
||||
ordinal_probability: 0.2
|
||||
trappor_upp: &trappor_upp
|
||||
canonical: trappor upp
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
number_min_abs_value: 3
|
||||
number_subtract_abs_value: 1
|
||||
numeric_probability: 0.8
|
||||
ordinal_probability: 0.2
|
||||
trappa: &trappa
|
||||
canonical: trappa
|
||||
abbreviated: tr
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
number_min_abs_value: 2
|
||||
number_max_abs_value: 2
|
||||
number_subtract_abs_value: 1
|
||||
numeric_probability: 0.8
|
||||
ordinal_probability: 0.2
|
||||
trappor: &trappor
|
||||
canonical: trappor
|
||||
abbreviated: tr
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.8
|
||||
spellout_probability: 0.2
|
||||
number_min_abs_value: 3
|
||||
number_subtract_abs_value: 1
|
||||
numeric_probability: 0.8
|
||||
ordinal_probability: 0.2
|
||||
bottenvaning: &bottenvaning
|
||||
canonical: bottenvåning
|
||||
abbreviated: bv
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
vindsvaningen: &vindsvaningen
|
||||
canonical: vindsvåningen
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
standalone_probability: 1.0
|
||||
vinds: &vinds
|
||||
canonical: vinds
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
standalone_probability: 1.0
|
||||
kallare: &kallare
|
||||
canonical: källare
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
# e.g. 1 källare
|
||||
numeric:
|
||||
direction: right
|
||||
direction_probability: 0.8
|
||||
# e.g. k1
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: left
|
||||
# e.g. 1:a k
|
||||
ordinal:
|
||||
direction: right
|
||||
standalone_probability: 0.9
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.09
|
||||
ordinal_probability: 0.005
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *kallare
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *vaning
|
||||
probability: 0.025
|
||||
- alternative: *vaningen
|
||||
probability: 0.025
|
||||
"-1":
|
||||
default: *kallare
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *vaning
|
||||
probability: 0.05
|
||||
- alternative: *vaningen
|
||||
probability: 0.05
|
||||
"0":
|
||||
default: *bottenvaning
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *entreplan
|
||||
probability: 0.2
|
||||
- alternative: *vaningen
|
||||
probability: 0.1
|
||||
- alternative: *vaning
|
||||
probability: 0.1
|
||||
"1":
|
||||
default: *bottenvaning
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *entreplan
|
||||
probability: 0.2
|
||||
- alternative: *vaningen
|
||||
probability: 0.1
|
||||
- alternative: *vaning
|
||||
probability: 0.1
|
||||
"top":
|
||||
default: *vaningen
|
||||
probability: 0.35
|
||||
alternatives:
|
||||
- alternative: *vaning
|
||||
probability: 0.35
|
||||
- alternative: *trappor_upp
|
||||
probability: 0.1
|
||||
- alternative: *trappor
|
||||
probability: 0.1
|
||||
- alternative: *vindsvaningen
|
||||
probability: 0.05
|
||||
- alternative: *vinds
|
||||
probability: 0.05
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *vaningen
|
||||
probability: 0.25
|
||||
alternatives:
|
||||
- alternative: *vaning
|
||||
probability: 0.2
|
||||
- alternative: *plan
|
||||
probability: 0.05
|
||||
- alternative: *trappa_upp
|
||||
probability: 0.125
|
||||
- alternative: *trappa
|
||||
probability: 0.125
|
||||
- alternative: *trappor_upp
|
||||
probability: 0.125
|
||||
- alternative: *trappor
|
||||
probability: 0.125
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: i närheten av
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: nära
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
nearby:
|
||||
default:
|
||||
canonical: i närheten
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.4
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: runt här
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
- alternative:
|
||||
canonical: nära här
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nära här
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: nära
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
canonical: omkring här
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
near_me:
|
||||
default:
|
||||
canonical: nära mig
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: i närheten av mig
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.2
|
||||
|
||||
in:
|
||||
default:
|
||||
canonical: i
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: på
|
||||
probability: 0.2
|
||||
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
directions:
|
||||
right: &hoger
|
||||
canonical: höger
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
sample_probability: 0.9
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: h
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
left: &vanster
|
||||
canonical: vänster
|
||||
sample: true
|
||||
canonical_probability: 0.1
|
||||
sample_probability: 0.9
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
whitespace_probability: 0.1
|
||||
numeric_probability: 0.8
|
||||
numeric_affix_probability: 0.2
|
||||
alternatives:
|
||||
- alternative: *hoger
|
||||
probability: 0.5
|
||||
- alternative: *vanster
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &ost
|
||||
canonical: öst
|
||||
abbreviated: ö
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: ö
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
eastern: &ostra
|
||||
canonical: östra
|
||||
abbreviated: ö:a
|
||||
canonical_probability: 0.9
|
||||
abbreviated_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
west: &vast
|
||||
canonical: väst
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
western: &vastra
|
||||
canonical: västra
|
||||
abbreviated: v:a
|
||||
canonical_probability: 0.9
|
||||
abbreviated_probability: 0.1
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
north: &norr
|
||||
canonical: norr
|
||||
abbreviated: n
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: n
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
northern: &norra
|
||||
canonical: norra
|
||||
abbreviated: n:a
|
||||
canonical_probability: 0.9
|
||||
abbreviated_probability: 0.1
|
||||
|
||||
south: &sod
|
||||
canonical: söd
|
||||
abbreviated: s
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
southern: &sodra
|
||||
canonical: södra
|
||||
abbreviated: s:a
|
||||
canonical_probability: 0.9
|
||||
abbreviated_probability: 0.1
|
||||
|
||||
alternatives:
|
||||
- alternative: *norr
|
||||
probability: 0.25
|
||||
- alternative: *ost
|
||||
probability: 0.25
|
||||
- alternative: *sod
|
||||
probability: 0.25
|
||||
- alternative: *vast
|
||||
probability: 0.25
|
||||
|
||||
entrances:
|
||||
ingang: &ingang
|
||||
canonical: ingång
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
entre: &entre
|
||||
canonical: entré
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# Eingang 1, Eingang A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *ingang
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *entre
|
||||
probability: 0.4
|
||||
numeric_probability: 0.1 # e.g. Eingang 1
|
||||
alpha_probability: 0.85 # e.g. Eingang A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
uppgang: &uppgang
|
||||
canonical: uppgång
|
||||
abbreviated: u
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
uppgang_hoger: &uppgang_hoger
|
||||
canonical: uppgång höger
|
||||
abbreviated: uh
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
uppgang_vanster: &uppgang_vanster
|
||||
canonical: uppgång vänster
|
||||
abbreviated: uv
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *uppgang
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *uppgang_hoger
|
||||
probability: 0.2
|
||||
- alternative: *uppgang_vanster
|
||||
probability: 0.2
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *norr
|
||||
- alternative: *sod
|
||||
- alternative: *ost
|
||||
- alternative: *vast
|
||||
|
||||
po_boxes:
|
||||
box: &box
|
||||
canonical: box
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Box No 1234
|
||||
postlada: &postlada
|
||||
canonical: postlåda
|
||||
abbreviated: pl
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2 # Pl No 1234
|
||||
|
||||
alphanumeric:
|
||||
sample: false
|
||||
default: *box
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *postlada
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # Box 123
|
||||
alpha_probability: 0.05 # Box A
|
||||
numeric_plus_alpha_probability: 0.04 # Box 123G
|
||||
alpha_plus_numeric_probability: 0.01 # Box A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.1
|
||||
- length: 5
|
||||
probability: 0.5
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
|
||||
units:
|
||||
lagenhet: &lagenhet
|
||||
canonical: lägenhet
|
||||
abbreviated: lgh
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
null_phrase_probability: 0.1
|
||||
# Lejlighed nummer 4
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
bostad: &bostad
|
||||
canonical: bostad
|
||||
abbreviated: bst
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.05
|
||||
lagenhetsnummer: &lagenhetsnummer
|
||||
canonical: lägenhetsnummer
|
||||
abbreviated: lgh nr
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
hus: &hus
|
||||
canonical: hus
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
rum: &rum
|
||||
canonical: rum
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *lagenhet
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *lagenhetsnummer
|
||||
probability: 0.05
|
||||
- alternative: *hus
|
||||
probability: 0.1
|
||||
- alternative: *rum
|
||||
probability: 0.1
|
||||
numeric_probability: 0.95 # e.g. Lägenhet 1
|
||||
alpha_probability: 0.05 # e.g. Lgh A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# Separate random probability for adding directions like 2H, 2V, etc.
|
||||
add_direction: true
|
||||
add_direction_probability: 0.005
|
||||
|
||||
# Add directions for plain numbers
|
||||
add_direction_numeric: true
|
||||
# Add direction only e.g. Lejlighed Igjen
|
||||
add_direction_standalone: true
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.2
|
||||
|
||||
# Use the actual floor phrase as long as the whole phrase is numeric
|
||||
# Has the effect of creating Bolignummer-style units
|
||||
use_floor_affix_unit_num_digits: 2
|
||||
|
||||
# In Swedish addresses, the ground level is 10, floors are 11, 12, ... basements are 9, 8, ...
|
||||
use_floor_ground_starts_at: 10
|
||||
# For single digit floors, use 09, 08, etc.
|
||||
use_floor_floor_num_digits: 2
|
||||
|
||||
|
||||
countries:
|
||||
# Swedish addresses in Finland
|
||||
fi:
|
||||
units:
|
||||
alphanumeric:
|
||||
default: *bostad
|
||||
probability: 1.0
|
||||
alternatives: []
|
||||
|
||||
add_direction: false
|
||||
add_direction_numeric: false
|
||||
add_direction_standalone: false
|
||||
|
||||
use_floor_probability: 0.1
|
||||
|
||||
use_floor_affix_unit_num_digits: 0
|
||||
|
||||
use_floor_ground_starts_at: 1
|
||||
use_floor_floor_num_digits: 2
|
||||
503
resources/addresses/tr.yaml
Normal file
503
resources/addresses/tr.yaml
Normal file
@@ -0,0 +1,503 @@
|
||||
# tr.yaml
|
||||
# -------
|
||||
# Turkish language specification
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.9
|
||||
alphanumeric_probability: 0.1
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.7
|
||||
alphanumeric_probability: 0.3
|
||||
|
||||
combinations:
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- staircase
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- level
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.1
|
||||
# For unit types like 2/34
|
||||
-
|
||||
components:
|
||||
- house_number
|
||||
- unit
|
||||
label: house_number
|
||||
separators:
|
||||
- separator: "/"
|
||||
probability: 0.95
|
||||
- separator: "-"
|
||||
probability: 0.05
|
||||
probability: 0.005
|
||||
|
||||
|
||||
numbers:
|
||||
|
||||
default: &numara
|
||||
canonical: numara
|
||||
abbreviated: "no:"
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_affix:
|
||||
affix: "no:"
|
||||
whitespace_probability: 0.6
|
||||
direction: left
|
||||
numeric_probability: 0.4
|
||||
numeric_affix_probability: 0.6
|
||||
|
||||
alphanumeric_phrase_probability: 0.05
|
||||
no_number_probability: 0.05
|
||||
|
||||
|
||||
and:
|
||||
default: &ve
|
||||
canonical: ve
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
|
||||
|
||||
cross_streets:
|
||||
ve: *ve
|
||||
corner_of: &kose
|
||||
canonical: köşe
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
kosesinde: &kosesinde
|
||||
canonical: köşesinde
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *ve
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *kose
|
||||
probability: 0.1
|
||||
- alternative: *kosesinde
|
||||
probability: 0.1
|
||||
|
||||
arasinda: &arasinda
|
||||
canonical: arasında
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *arasinda
|
||||
|
||||
levels:
|
||||
kat: &kat
|
||||
canonical: kat
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
roman_numeral_probability: 0.7
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
|
||||
zemin_kat: &zemin_kat
|
||||
canonical: zemin kat
|
||||
abbreviated: zk
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
asma_kat: &asma_kat
|
||||
canonical: asma kat
|
||||
half_floors: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
sample: true
|
||||
# e.g. asma kat 2
|
||||
numeric:
|
||||
direction: left
|
||||
# e.g. 2. asma kat
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.1
|
||||
ordinal_probability: 0.2
|
||||
standalone_probability: 0.6
|
||||
bodrum: &bodrum
|
||||
canonical: bodrum
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
# e.g. bodrum 1
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.8
|
||||
# e.g. 1. bodrum
|
||||
ordinal:
|
||||
direction: right
|
||||
digits:
|
||||
ascii_probability: 0.7
|
||||
roman_numeral_probability: 0.3
|
||||
standalone_probability: 0.99
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 1
|
||||
numeric_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *bodrum
|
||||
"-1":
|
||||
default: *bodrum
|
||||
# Special token for half-floors
|
||||
half_floors:
|
||||
default: *asma_kat
|
||||
"0":
|
||||
default: *zemin_kat
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *kat
|
||||
probability: 0.1
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *kat
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
directions:
|
||||
right: &sag
|
||||
canonical: sağ
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
left: &sol
|
||||
canonical: sol
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *sag
|
||||
probability: 0.5
|
||||
- alternative: *sol
|
||||
probability: 0.5
|
||||
|
||||
cardinal_directions:
|
||||
east: &dogu
|
||||
canonical: doğu
|
||||
abbreviated: d
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: d
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
west: &bati
|
||||
canonical: batı
|
||||
abbreviated: b
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: b
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
north: &kuzey
|
||||
canonical: kuzey
|
||||
abbreviated: k
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: k
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
south: &guney
|
||||
canonical: güney
|
||||
abbreviated: g
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: g
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *kuzey
|
||||
probability: 0.25
|
||||
- alternative: *dogu
|
||||
probability: 0.23
|
||||
- alternative: *guney
|
||||
probability: 0.23
|
||||
- alternative: *bati
|
||||
probability: 0.23
|
||||
|
||||
entrances:
|
||||
giris: &giris
|
||||
canonical: giriş
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# giriş 1, giriş A, etc.
|
||||
alphanumeric: &entrance_alphanumeric
|
||||
default: *giris
|
||||
numeric_probability: 0.1 # e.g. giriş 1
|
||||
alpha_probability: 0.85 # e.g. giriş A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
|
||||
staircases:
|
||||
merdiven: &merdiven
|
||||
canonical: merdiven
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *merdiven
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: right
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *sag
|
||||
probability: 0.2
|
||||
- alternative: *sol
|
||||
probability: 0.2
|
||||
- alternative: *kuzey
|
||||
probability: 0.15
|
||||
- alternative: *guney
|
||||
probability: 0.15
|
||||
- alternative: *dogu
|
||||
probability: 0.15
|
||||
- alternative: *bati
|
||||
probability: 0.15
|
||||
|
||||
po_boxes:
|
||||
posta_kutusu: &posta_kutusu
|
||||
canonical: posta kutusu
|
||||
abbreviated: pk
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *posta_kutusu
|
||||
numeric_probability: 0.9 # pp 123
|
||||
alpha_probability: 0.05 # p.p A
|
||||
numeric_plus_alpha_probability: 0.04 # pp 123G
|
||||
alpha_plus_numeric_probability: 0.01 # pp A123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
|
||||
units:
|
||||
daire: &daire
|
||||
canonical: daire
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
apartman: &apartman
|
||||
canonical: apartman
|
||||
abbreviated: apt
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.2
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
oda: &oda
|
||||
canonical: oda
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
ofis: &ofis
|
||||
canonical: ofis
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.1
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *daire
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *apartman
|
||||
probability: 0.3
|
||||
- alternative: *oda
|
||||
probability: 0.1
|
||||
numeric_probability: 0.9 # e.g. d. 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. A1
|
||||
alpha_probability: 0.04 # e.g. daire A
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.05
|
||||
|
||||
zones:
|
||||
commercial: &commercial_unit_types
|
||||
default: *oda
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *ofis
|
||||
probability: 0.4
|
||||
numeric_probability: 0.95 # e.g. oda 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
|
||||
alpha_probability: 0.03 # e.g. oda A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
university:
|
||||
default: *oda
|
||||
numeric_probability: 0.95 # e.g. oda 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
|
||||
alpha_probability: 0.03 # e.g. oda A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
1001
resources/addresses/uk.yaml
Normal file
1001
resources/addresses/uk.yaml
Normal file
File diff suppressed because it is too large
Load Diff
292
resources/addresses/zh.yaml
Normal file
292
resources/addresses/zh.yaml
Normal file
@@ -0,0 +1,292 @@
|
||||
# zh.yaml
|
||||
# -------
|
||||
# Chinese language specification (default is mainland China, Hong Kong below)
|
||||
|
||||
whitespace: false
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.15
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
numbers:
|
||||
default: &hao
|
||||
canonical: 号
|
||||
numeric_affix:
|
||||
affix: 号
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: &hao_traditional
|
||||
canonical: 號
|
||||
numeric_affix:
|
||||
affix: 號
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
probability: 0.2
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *hao
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *hao_traditional
|
||||
probability: 0.2
|
||||
alphanumeric_phrase_probability: 0.6
|
||||
|
||||
levels:
|
||||
lou: &lou
|
||||
canonical: 楼
|
||||
numeric_affix:
|
||||
affix: 楼
|
||||
direction: right
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.5
|
||||
digits:
|
||||
ascii_probability: 0.6
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.3
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
lou_traditional: &lou_traditional
|
||||
canonical: 樓
|
||||
numeric_affix:
|
||||
affix: 樓
|
||||
direction: right
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.5
|
||||
digits:
|
||||
ascii_probability: 0.6
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.3
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
ceng: &ceng
|
||||
canonical: 层
|
||||
numeric_affix:
|
||||
affix: 层
|
||||
direction: right
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.5
|
||||
digits:
|
||||
ascii_probability: 0.6
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.3
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
ceng_traditional: &ceng_traditional
|
||||
canonical: 層
|
||||
numeric_affix:
|
||||
affix: 層
|
||||
direction: right
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.5
|
||||
digits:
|
||||
ascii_probability: 0.6
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.3
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *lou
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *lou_traditional
|
||||
probability: 0.05
|
||||
- alternative: *ceng
|
||||
probability: 0.08
|
||||
- alternative: *ceng_traditional
|
||||
probability: 0.02
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
youzheng_xinxiang: &youzheng_xinxiang
|
||||
canonical: 邮政信箱
|
||||
numeric_affix:
|
||||
affix: 邮政信箱
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
use_number_phrase: true
|
||||
use_number_phrase_probability: 0.8
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
youzheng_xinxiang_traditional: &youzheng_xinxiang_traditional
|
||||
canonical: 郵政信箱
|
||||
numeric_affix:
|
||||
affix: 郵政信箱
|
||||
direction: left
|
||||
digits:
|
||||
ascii_probability: 0.3
|
||||
unicode_full_width_probability: 0.5
|
||||
spellout_probability: 0.2
|
||||
use_number_phrase: true
|
||||
use_number_phrase_probability: 0.8
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
|
||||
alphanumeric:
|
||||
default: *youzheng_xinxiang
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *youzheng_xinxiang_traditional
|
||||
probability: 0.1
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default: &youbian
|
||||
canonical: 邮编
|
||||
numeric_affix:
|
||||
affix: 邮编
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.9
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 0.1
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &youbian_traditional
|
||||
canonical: 郵編
|
||||
numeric_affix:
|
||||
affix: 郵編
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.9
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 0.1
|
||||
probability: 0.1
|
||||
|
||||
units:
|
||||
shi: &shi
|
||||
canonical: 室
|
||||
numeric_affix:
|
||||
affix: 室
|
||||
direction: right
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.5
|
||||
digits:
|
||||
ascii_probability: 0.6
|
||||
unicode_full_width_probability: 0.1
|
||||
spellout_probability: 0.3
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *shi
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
|
||||
|
||||
countries:
|
||||
# Hong Kong
|
||||
hk:
|
||||
components:
|
||||
# Floor number a little more common in Hong Kong than mainland China
|
||||
level:
|
||||
null_probability: 0.75
|
||||
alphanumeric_probability: 0.25
|
||||
|
||||
numbers: &numbers_prefer_traditional
|
||||
default: *hao_traditional
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hao
|
||||
probability: 0.3
|
||||
|
||||
house_numbers: &house_number_prefer_traditional
|
||||
alphanumeric:
|
||||
default: *hao_traditional
|
||||
probability: 0.7
|
||||
alternatives:
|
||||
- alternative: *hao
|
||||
probability: 0.3
|
||||
alphanumeric_phrase_probability: 0.6
|
||||
|
||||
levels: &levels_prefer_traditional
|
||||
alphanumeric:
|
||||
default: *lou_traditional
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *lou
|
||||
probability: 0.15
|
||||
- alternative: *ceng_traditional
|
||||
probability: 0.06
|
||||
- alternative: *ceng
|
||||
probability: 0.04
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes: &po_boxes_prefer_traditional
|
||||
alphanumeric:
|
||||
default: *youzheng_xinxiang_traditional
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *youzheng_xinxiang
|
||||
probability: 0.25
|
||||
numeric_probability: 1.0
|
||||
|
||||
|
||||
postcodes: &postcodes_prefer_traditional
|
||||
alphanumeric:
|
||||
default: *youbian_traditional
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: *youbian
|
||||
probability: 0.25
|
||||
|
||||
# Macau
|
||||
mo:
|
||||
numbers: *numbers_prefer_traditional
|
||||
house_numbers: *house_number_prefer_traditional
|
||||
levels: *levels_prefer_traditional
|
||||
po_boxes: *po_boxes_prefer_traditional
|
||||
postcodes: *postcodes_prefer_traditional
|
||||
|
||||
units:
|
||||
alphanumeric_probability:
|
||||
numeric_probability: 0.9
|
||||
alpha_probability: 0.1
|
||||
|
||||
|
||||
# Taiwan
|
||||
tw:
|
||||
numbers: *numbers_prefer_traditional
|
||||
house_numbers: *house_number_prefer_traditional
|
||||
levels: *levels_prefer_traditional
|
||||
po_boxes: *po_boxes_prefer_traditional
|
||||
postcodes: *postcodes_prefer_traditional
|
||||
|
||||
units:
|
||||
alphanumeric_probability:
|
||||
numeric_probability: 0.9
|
||||
alpha_probability: 0.1
|
||||
153
resources/addresses/zh_pinyin.yaml
Normal file
153
resources/addresses/zh_pinyin.yaml
Normal file
@@ -0,0 +1,153 @@
|
||||
# zh_pinyin.yaml
|
||||
# --------------
|
||||
# Chinese (Pinyin)
|
||||
|
||||
whitespace: false
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
|
||||
alphanumeric_probability: 0.15
|
||||
|
||||
unit:
|
||||
# If no unit number is specified
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
numbers:
|
||||
default: &hao
|
||||
canonical: hao
|
||||
numeric_affix:
|
||||
affix: -hao
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
house_numbers:
|
||||
alphanumeric:
|
||||
default: *hao
|
||||
alphanumeric_phrase_probability: 0.6
|
||||
|
||||
levels:
|
||||
lou: &lou
|
||||
canonical: lóu
|
||||
numeric_affix:
|
||||
affix: -lóu
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
lou_no_accent: &lou_no_accent
|
||||
canonical: lou
|
||||
numeric_affix:
|
||||
affix: -lou
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
ceng: &ceng
|
||||
canonical: céng
|
||||
numeric_affix:
|
||||
affix: -céng
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
ceng_no_accent: &ceng_no_accent
|
||||
canonical: ceng
|
||||
numeric_affix:
|
||||
affix: -ceng
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
numbering_starts_at: 1
|
||||
|
||||
alphanumeric:
|
||||
default: *lou
|
||||
probability: 0.85
|
||||
alternatives:
|
||||
- alternative: *lou_no_accent
|
||||
probability: 0.05
|
||||
- alternative: *ceng
|
||||
probability: 0.08
|
||||
- alternative: *ceng_no_accent
|
||||
probability: 0.02
|
||||
numeric_probability: 1.0
|
||||
|
||||
po_boxes:
|
||||
youzheng_xinxiang: &youzheng_xinxiang
|
||||
canonical: youzheng xinxiang
|
||||
numeric:
|
||||
direction: left
|
||||
numeric_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *youzheng_xinxiang
|
||||
numeric_probability: 1.0
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
postcodes:
|
||||
alphanumeric:
|
||||
default: &youbian
|
||||
canonical: yóubiān
|
||||
numeric:
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.9
|
||||
numeric_probability: 0.1
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &youbian_no_accent
|
||||
canonical: youbian
|
||||
numeric:
|
||||
direction: left
|
||||
# null_probability means the chance of doing nothing e.g. just the postal code
|
||||
null_probability: 0.9
|
||||
numeric_probability: 0.1
|
||||
probability: 0.1
|
||||
|
||||
units:
|
||||
shi: &shi
|
||||
canonical: shì
|
||||
numeric_affix:
|
||||
affix: -shì
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
shi_no_accent: &shi_no_accent
|
||||
canonical: shi
|
||||
numeric_affix:
|
||||
affix: -shi
|
||||
upper_case: false
|
||||
direction: right
|
||||
numeric_probability: 0.0
|
||||
numeric_affix_probability: 1.0
|
||||
|
||||
alphanumeric:
|
||||
default: *shi
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *shi_no_accent
|
||||
probability: 0.2
|
||||
numeric_probability: 1.0
|
||||
use_positive_numbers_probability: 1.0
|
||||
# If we have a floor number (from building:levels), use it
|
||||
use_floor_probability: 0.8
|
||||
2
resources/boundaries/geonames/ad.yaml
Normal file
2
resources/boundaries/geonames/ad.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
3
resources/boundaries/geonames/ar.yaml
Normal file
3
resources/boundaries/geonames/ar.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/at.yaml
Normal file
3
resources/boundaries/geonames/at.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
3
resources/boundaries/geonames/au.yaml
Normal file
3
resources/boundaries/geonames/au.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
3
resources/boundaries/geonames/ax.yaml
Normal file
3
resources/boundaries/geonames/ax.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/bd.yaml
Normal file
3
resources/boundaries/geonames/bd.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# unclear what admin2 is, maybe city
|
||||
3
resources/boundaries/geonames/be.yaml
Normal file
3
resources/boundaries/geonames/be.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/bg.yaml
Normal file
3
resources/boundaries/geonames/bg.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/br.yaml
Normal file
3
resources/boundaries/geonames/br.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/ca.yaml
Normal file
3
resources/boundaries/geonames/ca.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/ch.yaml
Normal file
3
resources/boundaries/geonames/ch.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
4
resources/boundaries/geonames/cz.yaml
Normal file
4
resources/boundaries/geonames/cz.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
admin_codes:
|
||||
# The GeoNames admin1 boundaries are admin_level=5 or 6 in OSM
|
||||
# However, they do appear to be states, might need to update Czech OSM config
|
||||
admin1: state_district
|
||||
3
resources/boundaries/geonames/de.yaml
Normal file
3
resources/boundaries/geonames/de.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/dk.yaml
Normal file
3
resources/boundaries/geonames/dk.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of city and island, need to list specifically
|
||||
3
resources/boundaries/geonames/do.yaml
Normal file
3
resources/boundaries/geonames/do.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/dz.yaml
Normal file
3
resources/boundaries/geonames/dz.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/es.yaml
Normal file
3
resources/boundaries/geonames/es.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
5
resources/boundaries/geonames/fi.yaml
Normal file
5
resources/boundaries/geonames/fi.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
admin_codes:
|
||||
# The GeoNames admin1 boundaries are admin_level=6 in OSM
|
||||
# However, they do appear to be states, might need to update Finnish OSM config
|
||||
admin1: state_district
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/fo.yaml
Normal file
3
resources/boundaries/geonames/fo.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/fr.yaml
Normal file
3
resources/boundaries/geonames/fr.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/gb.yaml
Normal file
3
resources/boundaries/geonames/gb.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/gt.yaml
Normal file
3
resources/boundaries/geonames/gt.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
2
resources/boundaries/geonames/gu.yaml
Normal file
2
resources/boundaries/geonames/gu.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
3
resources/boundaries/geonames/hr.yaml
Normal file
3
resources/boundaries/geonames/hr.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of city and city_district, need to list specifically
|
||||
4
resources/boundaries/geonames/hu.yaml
Normal file
4
resources/boundaries/geonames/hu.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
admin_codes:
|
||||
# The GeoNames admin1 boundaries are admin_level=6 in OSM
|
||||
# However, they do appear to be states, might need to update Hungary OSM config
|
||||
admin1: state_district
|
||||
3
resources/boundaries/geonames/ie.yaml
Normal file
3
resources/boundaries/geonames/ie.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
2
resources/boundaries/geonames/im.yaml
Normal file
2
resources/boundaries/geonames/im.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
3
resources/boundaries/geonames/in.yaml
Normal file
3
resources/boundaries/geonames/in.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/is.yaml
Normal file
3
resources/boundaries/geonames/is.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/it.yaml
Normal file
3
resources/boundaries/geonames/it.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
2
resources/boundaries/geonames/je.yaml
Normal file
2
resources/boundaries/geonames/je.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
3
resources/boundaries/geonames/jp.yaml
Normal file
3
resources/boundaries/geonames/jp.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
2
resources/boundaries/geonames/li.yaml
Normal file
2
resources/boundaries/geonames/li.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
3
resources/boundaries/geonames/lk.yaml
Normal file
3
resources/boundaries/geonames/lk.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/lt.yaml
Normal file
3
resources/boundaries/geonames/lt.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
4
resources/boundaries/geonames/lu.yaml
Normal file
4
resources/boundaries/geonames/lu.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
admin_codes:
|
||||
# The admin1 names don't appear to exist in OSM, but would be states otherwise
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
2
resources/boundaries/geonames/md.yaml
Normal file
2
resources/boundaries/geonames/md.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: state_district
|
||||
2
resources/boundaries/geonames/mp.yaml
Normal file
2
resources/boundaries/geonames/mp.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: state_district
|
||||
2
resources/boundaries/geonames/mt.yaml
Normal file
2
resources/boundaries/geonames/mt.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
3
resources/boundaries/geonames/mx.yaml
Normal file
3
resources/boundaries/geonames/mx.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/my.yaml
Normal file
3
resources/boundaries/geonames/my.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/nl.yaml
Normal file
3
resources/boundaries/geonames/nl.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/no.yaml
Normal file
3
resources/boundaries/geonames/no.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/nz.yaml
Normal file
3
resources/boundaries/geonames/nz.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/ph.yaml
Normal file
3
resources/boundaries/geonames/ph.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: country_region
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
3
resources/boundaries/geonames/pk.yaml
Normal file
3
resources/boundaries/geonames/pk.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/pl.yaml
Normal file
3
resources/boundaries/geonames/pl.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
6
resources/boundaries/geonames/pr.yaml
Normal file
6
resources/boundaries/geonames/pr.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
admin_codes:
|
||||
admin1: state_district
|
||||
# The notion of a "barrio" in the official sense in PR is not quite a
|
||||
# municipality, and has no current official purpose, but might be useful
|
||||
# to have the name + "barrio" version available in libpostal
|
||||
admin2: city
|
||||
8
resources/boundaries/geonames/pt.yaml
Normal file
8
resources/boundaries/geonames/pt.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
admin_codes:
|
||||
admin1: state_district
|
||||
admin2: city
|
||||
|
||||
overrides:
|
||||
id:
|
||||
"2593105": "state" # Madeira
|
||||
"3411865": "state" # Azores
|
||||
4
resources/boundaries/geonames/ro.yaml
Normal file
4
resources/boundaries/geonames/ro.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# These are mostly admin_level=6, which maps to city in OSM
|
||||
admin2: city
|
||||
3
resources/boundaries/geonames/ru.yaml
Normal file
3
resources/boundaries/geonames/ru.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
3
resources/boundaries/geonames/se.yaml
Normal file
3
resources/boundaries/geonames/se.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: city
|
||||
22
resources/boundaries/geonames/si.yaml
Normal file
22
resources/boundaries/geonames/si.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
|
||||
overrides:
|
||||
id:
|
||||
# Districts of Ljubljana (suburbs in OSM)
|
||||
"3196350": "suburb" # Opština Ljubljana-Vič-Rudnik
|
||||
"3196352": "suburb" # Opština [historical] Ljubljana-Šiška
|
||||
"3196355": "suburb" # Opština Ljubljana-Moste-Polje
|
||||
"3196356": "suburb" # Opština Ljubljana-Center
|
||||
"3196357": "suburb" # Opčina Ljubljana-Bežigrad
|
||||
"9794374": "suburb" # Črnuče District
|
||||
"9794375": "suburb" # Dravlje District
|
||||
"9794376": "suburb" # Golovec District
|
||||
"9794377": "suburb" # Jarše District
|
||||
"9794378": "suburb" # Posavje District
|
||||
"9794379": "suburb" # Rožnik District
|
||||
"9794380": "suburb" # Sostro District
|
||||
"9794381": "suburb" # Šentvid District
|
||||
"9794382": "suburb" # Šmarna Gora District
|
||||
"9794384": "suburb" # Trnovo District
|
||||
"9794386": "suburb" # Vič District
|
||||
17
resources/boundaries/geonames/sk.yaml
Normal file
17
resources/boundaries/geonames/sk.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
# admin2 is a mix of state_district and city, need to list specifically
|
||||
admin2: state_district
|
||||
overrides:
|
||||
id:
|
||||
# Districts of Bratislava
|
||||
"8986283": "city_district" # Okres Bratislava I
|
||||
"8986339": "city_district" # Okres Bratislava II
|
||||
"8986340": "city_district" # Okres Bratislava III
|
||||
"8986341": "city_district" # Okres Bratislava IV
|
||||
"8986342": "city_district" # Okres Bratislava V
|
||||
# Districts of Košice
|
||||
"8986335": "city_district" # Košice I
|
||||
"8986336": "city_district" # Košice II
|
||||
"8986337": "city_district" # Košice III
|
||||
"8986338": "city_district" # Košice IV
|
||||
2
resources/boundaries/geonames/sm.yaml
Normal file
2
resources/boundaries/geonames/sm.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
8
resources/boundaries/geonames/th.yaml
Normal file
8
resources/boundaries/geonames/th.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
|
||||
overrides:
|
||||
id:
|
||||
# Bangkok the state is treated as a city
|
||||
# Note: we do this in OSM to get the boundary, so duplicate in GeoNames
|
||||
"1609348": "city"
|
||||
3
resources/boundaries/geonames/tr.yaml
Normal file
3
resources/boundaries/geonames/tr.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
16
resources/boundaries/geonames/us.yaml
Normal file
16
resources/boundaries/geonames/us.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
|
||||
overrides:
|
||||
id:
|
||||
# Manhattan (Island)
|
||||
"8479493": "city_district"
|
||||
# Brooklyn
|
||||
"5110300": "city_district"
|
||||
# Bronx
|
||||
"5110266": "city_district"
|
||||
# Queens
|
||||
"5133266": "city_district"
|
||||
# Staten Island
|
||||
"5139568": "city_district"
|
||||
2
resources/boundaries/geonames/yt.yaml
Normal file
2
resources/boundaries/geonames/yt.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
admin_codes:
|
||||
admin1: city
|
||||
3
resources/boundaries/geonames/za.yaml
Normal file
3
resources/boundaries/geonames/za.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
admin_codes:
|
||||
admin1: state
|
||||
admin2: state_district
|
||||
120
resources/boundaries/names/global.yaml
Normal file
120
resources/boundaries/names/global.yaml
Normal file
@@ -0,0 +1,120 @@
|
||||
names:
|
||||
keys:
|
||||
default: name
|
||||
probability: 0.75
|
||||
alternatives:
|
||||
- alternative: short_name # e.g. NYC
|
||||
probability: 0.12
|
||||
- alternative: alt_name # e.g. New York (instead of New York City)
|
||||
probability: 0.12
|
||||
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
|
||||
probability: 0.01
|
||||
|
||||
components:
|
||||
country:
|
||||
keys:
|
||||
default: name
|
||||
probability: 0.87
|
||||
alternatives:
|
||||
- alternative: ISO3166-1:alpha2
|
||||
probability: 0.02
|
||||
- alternative: ISO3166-1:alpha3
|
||||
probability: 0.01
|
||||
- alternative: short_name
|
||||
probability: 0.04
|
||||
- alternative: alt_name
|
||||
probability: 0.04
|
||||
- alternative: int_name
|
||||
probability: 0.01
|
||||
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
|
||||
probability: 0.01
|
||||
|
||||
regex_replacements:
|
||||
- country: fr
|
||||
pattern: "(?:lyon|paris|marseilles?) ([\\d]+(?:e|er|ème|eme) arrondissement)"
|
||||
replace_with_group: 1
|
||||
replace_probability: 0.5
|
||||
|
||||
prefixes:
|
||||
language:
|
||||
ru:
|
||||
city:
|
||||
default:
|
||||
prefix: г.
|
||||
probability: 0.35
|
||||
alternatives:
|
||||
- alternative:
|
||||
prefix: г
|
||||
probability: 0.1
|
||||
- alternative:
|
||||
prefix: город
|
||||
probability: 0.05
|
||||
|
||||
# This section overrides place names
|
||||
exceptions:
|
||||
# Boroughs of New York City
|
||||
- id: 2552485 # New York County (don't use Manhattan)
|
||||
type: relation
|
||||
default: name # New York County
|
||||
probability: 1.0
|
||||
- id: 369518 # Kings County (don't use Brooklyn)
|
||||
type: relation
|
||||
default: name # Kings County
|
||||
probability: 1.0
|
||||
- id: 369519 # Queens County (don't use Queens)
|
||||
type: relation
|
||||
default: name # Queens County
|
||||
probability: 1.0
|
||||
- id: 2552450 # Bronx County (don't use The Bronx)
|
||||
type: relation
|
||||
default: name # Bronx County
|
||||
probability: 1.0
|
||||
- id: 962876 # Richmond County (don't use Staten Island)
|
||||
type: relation
|
||||
default: name # Richmond County
|
||||
probability: 1.0
|
||||
- id: 6577227 # Kingston Parish (always use Kingston)
|
||||
type: relation
|
||||
default: name # Kingston
|
||||
probability: 1.0
|
||||
- id: 30674098 # Sao Paulo
|
||||
type: node
|
||||
default: name
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: alt_name
|
||||
probability: 0.09
|
||||
- alternative: official_name
|
||||
probability: 0.01
|
||||
- id: 298285 # Sao Paulo (relation)
|
||||
type: relation
|
||||
default: name
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: alt_name
|
||||
probability: 0.09
|
||||
- alternative: official_name
|
||||
probability: 0.01
|
||||
- id: 556706 # New Zealand
|
||||
type: relation
|
||||
default: name:en
|
||||
probability: 0.77
|
||||
alternatives:
|
||||
- alternative: name
|
||||
probability: 0.1
|
||||
- alternative: ISO3166-1:alpha2
|
||||
probability: 0.02
|
||||
- alternative: ISO3166-1:alpha3
|
||||
probability: 0.01
|
||||
- alternative: short_name
|
||||
probability: 0.04
|
||||
- alternative: alt_name
|
||||
probability: 0.04
|
||||
- alternative: int_name
|
||||
probability: 0.01
|
||||
- alternative: official_name
|
||||
probability: 0.01
|
||||
- id: 2383266 # Melbourne (city center)
|
||||
type: relation
|
||||
default: alt_name # Melbourne
|
||||
probability: 1.0
|
||||
11
resources/boundaries/names/languages/de.yaml
Normal file
11
resources/boundaries/names/languages/de.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
# Prefixes which can be stripped to normalize a place name
|
||||
prefixes:
|
||||
- stadtteil
|
||||
- stadtbezirk
|
||||
- gemeinde
|
||||
- landkreis
|
||||
- kreis
|
||||
- grenze
|
||||
- freistaat
|
||||
- regierungsbezirk
|
||||
- gemeindefreies gebiet
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user