Initial fork commit

This commit is contained in:
2025-09-06 22:03:29 -04:00
commit 2d238cd339
1748 changed files with 932506 additions and 0 deletions

1001
resources/addresses/bg.yaml Normal file

File diff suppressed because it is too large Load Diff

585
resources/addresses/bs.yaml Normal file
View File

@@ -0,0 +1,585 @@
# bs.yaml
# -------
# Bosnian language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
no_number:
default:
canonical: bez broja
abbreviated: bb
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
default: &broj
canonical: broj
abbreviated: br
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "br."
whitespace_probability: 0.6
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &i
canonical: i
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
i: *i
at: &na
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &ugao
canonical: ugao
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &uglu
canonical: uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu: &na_uglu
canonical: na uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.65
alternatives:
- alternative: *na
probability: 0.1
- alternative: *uglu
probability: 0.1
- alternative: *na_uglu
probability: 0.1
- alternative: *ugao
probability: 0.05
izmedu: &izmedu
canonical: između
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *izmedu
levels:
sprat: &sprat
canonical: sprat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
kat: &kat
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
prizemlje: &prizemlje
canonical: prizemlje
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
mezanino: &mezanin
canonical: mezanin
half_floors: true
canonical_probability: 0.8
sample_probability: 0.2
sample: true
# e.g. mezanin 2
numeric:
direction: left
# e.g. 2. mezanin
ordinal:
direction: right
numeric_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
podrum: &podrum
canonical: podrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. podrum 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. podrum
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podrum
"-1":
default: *podrum
# Special token for half-floors
half_floors:
default: *mezanin
"0":
default: *prizemlje
probability: 0.5
alternatives:
- alternative: *parter
probability: 0.4
- alternative: *kat
probability: 0.05
- alternative: *sprat
probability: 0.05
numbering_starts_at: 0
alphanumeric:
default: *kat
probability: 0.5
alternatives:
- alternative: *sprat
probability: 0.5
numeric_probability: 0.69 # With this probability, pick an integer
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: u blizini
nearby:
default:
canonical: u blizini
probability: 0.6
alternatives:
- alternative:
canonical: u blizini ovdje
probability: 0.3
- alternative:
canonical: ovde
probability: 0.1
near_me:
default:
canonical: u blizini mene
# Don't worry about agreement
in:
default:
canonical: u
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &desno
canonical: desno
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lijevo
canonical: lijevo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *desno
probability: 0.5
- alternative: *lijevo
probability: 0.5
cardinal_directions:
east: &istok
canonical: istok
abbreviated: i
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: i
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: zapad
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sjever
canonical: sjever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jug
canonical: jug
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sjever
probability: 0.25
- alternative: *istok
probability: 0.23
- alternative: *jug
probability: 0.23
- alternative: *zapad
probability: 0.23
entrances:
ulaz: &ulaz
canonical: ulaz
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Ulaz 1, Ulaz A, etc.
alphanumeric: &entrance_alphanumeric
default: *ulaz
numeric_probability: 0.1 # e.g. Ulaz 1
alpha_probability: 0.85 # e.g. Ulaz A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stubiste: &stubiste
canonical: stubište
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stubiste
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *desno
probability: 0.2
- alternative: *lijevo
probability: 0.2
- alternative: *sjever
probability: 0.15
- alternative: *jug
probability: 0.15
- alternative: *istok
probability: 0.15
- alternative: *zapad
probability: 0.15
po_boxes:
postanski_pretinac: &postanski_pretinac
canonical: poštanski pretinac
abbreviated: p.p
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *postanski_pretinac
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
stan: &stan
canonical: stan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: apartman
abbreviated: ap
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba: &soba
canonical: soba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
ured: &ured
canonical: ured
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *stan
probability: 0.6
alternatives:
- alternative: *apartman
probability: 0.3
- alternative: *soba
probability: 0.1
numeric_probability: 0.9 # e.g. stan. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. stan A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *soba
probability: 0.6
alternatives:
- alternative: *ured
probability: 0.4
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *soba
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

856
resources/addresses/ca.yaml Normal file
View File

@@ -0,0 +1,856 @@
# ca.yaml
# -------
# Catalan language specification
components:
level:
# If no floor number is specified
null_probability: 0.6
alphanumeric_probability: 0.35
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
standalone_probability: 0.05
numbers:
default: &numero
canonical: número
abbreviated: "nº"
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#" # e.g. #3, #2F, etc.
probability: 0.5
alternatives:
- alternative:
direction: left # affix goes on the number's left
# Probabilities for numbers
numeric_probability: 0.7
numeric_affix_probability: 0.3
and:
default: &i
canonical: i
abbreviated: "&"
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
house_numbers:
# sense número (s/n) addresses
no_number:
default:
canonical: sense número
abbreviated: s/n
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numero
alphanumeric_phrase_probability: 0.01
no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified
levels:
# Everywhere except Spain
floor: &pis
canonical: pis
abbreviated: p
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true # Occasionally add variation of "number", e.g. Pis No 2
add_number_phrase_probability: 0.05
numeric_affix:
affix: p
direction: left # P2
# e.g. 2o piso
ordinal:
direction: right
direction_probability: 0.95 # Let it vary occasionally e.g. Pis 2o
standalone_probability: 0.2 # Let e.g. 5º be the entire floor string
# If ordinal is selected, chance of e.g. just using 2o without Piso
null_phrase_probability: 0.6
numeric_probability: 0.2
numeric_affix_probability: 0.05
ordinal_probability: 0.75
# Ground floor
baixos: &baixos
canonical: baixos
abbreviated: bxs
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.3
sample_probability: 0.1
pis_baix: &pis_baix
canonical: pis baix
abbreviated: pb
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
sota: &sota
canonical: sota
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Used when floor number is < 0 (starts at -1 in all countries)
soterrani: &soterrani
canonical: soterrani
abbreviated: so
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
# e.g. soterrani 1
numeric:
direction: left
numeric_affix:
affix: so
direction: left
# e.g. segon soterrani
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
sub_soterrani: &sub_soterrani
canonical: sub soterrani
abbreviated: ss
sample: true
# e.g. sub soterrani 1
numeric:
direction: left
numeric_affix:
affix: ss
direction: left
# e.g. segon sub soterrani
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 2
# Soterrani 2 == Sub-soterrani 1
number_subtract_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
entresol: &entresol
canonical: entresòl
abbreviated: entl
half_floors: true
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
# e.g. entresòl 2
numeric:
direction: left
# e.g. ent2
numeric_affix:
affix: ent
direction: left
# e.g. segon entresòl
ordinal:
direction: right
numeric_probability: 0.1
numeric_affix_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
pis_principal: &pis_principal
canonical: pis principal
abbreviated: pis pral
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.3
sample_probability: 0.5
principal: &principal
canonical: principal
abbreviated: pral
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
atic: &atic
canonical: àtic
abbreviated: át
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
sobreatic: &sobreatic
canonical: sobreàtic
aliases:
"<-1":
default: *soterrani
probability: 0.6
alternatives:
- alternative: *sub_soterrani
probability: 0.3995
- alternative: *pis
probability: 0.0005
"-1":
default: *soterrani
probability: 0.9995
alternatives:
- alternative: *pis
probability: 0.0005
# Special token for half-floors
half_floors:
default: *entresol
"0":
default: *baixos
probability: 0.495
alternatives:
- alternative: *pis_baix
probability: 0.395
- alternative: *sota
probability: 0.1
- alternative: *pis
# Piso 0 is uncommon
probability: 0.01
top:
default: *pis
probability: 0.85
alternatives:
- alternative: *atic
probability: 0.1
- alternative: *sobreatic
probability: 0.05
numbering_starts_at: 0
alphanumeric:
default: *pis
add_number_phrase: true
add_number_phrase_probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
blocks:
alphanumeric:
default:
canonical: bloc
abbreviated: bl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
categories:
near:
default:
canonical: a prop de
probability: 0.5
alternatives:
- alternative:
canonical: prop de
probability: 0.2
- alternative:
canonical: prop
probability: 0.1
- alternative:
canonical: a prop
probability: 0.1
- alternative:
canonical: proper
probability: 0.05
- alternative:
canonical: proper a
probability: 0.05
nearby:
default:
canonical: proper
probability: 0.5
alternatives:
- alternative:
canonical: a prop
probability: 0.1
- alternative:
canonical: a prop d'aquí
probability: 0.1
- alternative:
canonical: a prop d'aqui
probability: 0.1
- alternative:
canonical: aquí
probability: 0.1
- alternative:
canonical: aqui
probability: 0.1
near_me:
default:
canonical: a prop meu
in:
default:
canonical: a
probability: 0.6
alternatives:
- alternative:
canonical: dins
probability: 0.2
- alternative:
canonical: en
probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
cross_streets:
and: *i
amb: &amb
canonical: amb
a: &a
canonical: a
corner_of: &cantonada_de
canonical: cantonada de
sample: true
canonical_probability: 0.7
sample_probability: 0.3
at_the_corner_of: &a_la_cantonada_de
canonical: a la cantonada de
sample: true
canonical_probability: 0.7
sample_probability: 0.3
corner: &cantonada
canonical: cantonada
sample: true
canonical_probability: 0.7
sample_probability: 0.3
intersection:
default: *i
probability: 0.55
alternatives:
- alternative: *amb
probability: 0.2
- alternative: *a
probability: 0.1
- alternative: *cantonada_de
probability: 0.09
- alternative: *a_la_cantonada_de
probability: 0.05
- alternative: *cantonada
probability: 0.01
between:
canonical: entre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probabililty: 0.5
po_boxes:
apartat: &apartat
canonical: apartat
abbreviated: apt
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.4 # Apt No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *apartat
numeric_probability: 0.9 # Apt 123
alpha_probability: 0.05 # Apt A
numeric_plus_alpha_probability: 0.04 # Apt 123G
alpha_plus_numeric_probability: 0.01 # Apt A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default:
canonical: codi postal
abbreviated: cp
sample: true
canonical_probability: 0.01
abbreviated_probability: 0.95
sample_probability: 0.04
numeric:
# Postcodes in Spain and Latin America are sometimes prefixed by CP
direction: left
numeric_affix:
affix: cp
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.7
numeric_probability: 0.18
numeric_affix_probability: 0.12
strict_numeric: true
directions:
right: &dreta
canonical: dreta
abbreviated: dta
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: d
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
left: &esquerra
canonical: esquerra
abbreviated: esq
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: e
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
rear: &posterior
canonical: posterior
abbreviated: pos
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: right
front: &front
canonical: front
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *dreta
probability: 0.45
- alternative: *esquerra
probability: 0.45
- alternative: *posterior
probability: 0.05
- alternative: *front
probability: 0.05
anteroposterior:
alternatives:
- alternative: *front
probability: 0.5
- alternative: *posterior
probability: 0.5
lateral:
alternatives:
- alternative: *dreta
probability: 0.5
- alternative: *esquerra
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &oest
canonical: oest
abbreviated: w
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *oest
probability: 0.25
entrances:
entrada: &entrada
canonical: entrada
abbreviated: entr
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
numeric:
direction: left
# Entrance 1, Entrance A, etc.
alphanumeric:
default: *entrada
numeric_probability: 0.1 # e.g. Entrance 1
alpha_probability: 0.85 # e.g. Entrnace A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *oest
- alternative: *dreta
- alternative: *esquerra
- alternative: *posterior
- alternative: *front
staircases:
escala: &escala
canonical: escala
abbreviated: esc
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Stair A, Stair 1, etc.
default: *escala
numeric_probability: 0.6 # e.g. Escalera 1
alpha_probability: 0.35 # e.g. Escalera A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Escalera Izq
direction_probability: 0.8
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *oest
- alternative: *dreta
- alternative: *esquerra
- alternative: *posterior
- alternative: *front
units:
flat: &apartament
canonical: apartament
abbreviated: apmt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
door: &porta
canonical: porta
abbreviated: pta
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
# If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B"
null_phrase_probability: 0.15
ordinal:
direction: right
gender: f
direction_probability: 0.95 # Let it vary occasionally e.g. Porta 2a
null_phrase_probability: 0.8 # Let e.g. 5a be the entire unit string
numeric_probability: 0.25
ordinal_probability: 0.75
lletra: &lletra
canonical: lletra
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
office: &oficina
canonical: oficina
abbreviated: of
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
numeric:
direction: left
# Another word for unit, used more in Colombia
unitat: &unitat
canonical: unitat
abbreviated: un
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
lot: &lot
canonical: lot
abbreviated: lt
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
parcel: &parcella
canonical: parcel·la
sample: true
canonical_probability: 0.8
sample_probability: 0.2
habitacio: &habitacio
canonical: habitació
sample: true
canonical_probability: 0.8
sample_probability: 0.2
casa: &casa
canonical: casa
numeric:
direction: left
room: &sala
canonical: sala
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *porta
probability: 0.8
sample: true
alternatives:
- alternative: *apartament
probability: 0.1
- alternative: *casa
probability: 0.1
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
add_direction: true
add_direction_probability: 0.1
add_direction_numeric: true # Only for numbers
add_direction_standalone: true # A unit can be as simple as "D"
numeric_probability: 0.7 # e.g. Porta 1a
numeric_plus_alpha_probability: 0.01 # e.g. Porta 1A
alpha_plus_numeric_probability: 0.01 # e.g. Porta A1
alpha_probability: 0.28 # e.g. Porta A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *porta
probability: 0.8
alternatives:
- alternative: *lletra
probability: 0.12
- alternative: *apartament
probability: 0.05
- alternative: *casa
probability: 0.01
- alternative: *unitat
probability: 0.01
- alternative: *habitacio
probability: 0.01
zones:
residential: *unit_alphanumeric
commercial:
default: *oficina
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.2
numeric_probability: 0.9 # e.g. Oficina 1
numeric_plus_alpha_probability: 0.01 # e.g. Oficina 1A
alpha_plus_numeric_probability: 0.01 # e.g. Oficina A1
alpha_probability: 0.08 # e.g. Oficina A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *oficina
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.15
- alternative: *lletra
probability: 0.05
industrial:
default: *lot
probability: 0.5
alternatives:
- alternative: *oficina
probability: 0.3
- alternative: *unitat
probability: 0.19
- alternative: *parcella
probability: 0.01
numeric_probability: 0.9 # e.g. Lote 1
numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lote A1
alpha_probability: 0.08 # e.g. Lote A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *sala
probability: 0.9
alternatives:
- alternative: *porta
probability: 0.1
numeric_probability: 0.9 # e.g. Sala 1
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
alpha_probability: 0.08 # e.g. Sala A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *sala
probability: 0.9
alternatives:
- alternative: *porta
probability: 0.08
- alternative: *lletra
probability: 0.02
allotments:
lot:
default: *lot
numeric_probability: 0.8
alphanumeric_probability: 0.1
alpha_probability: 0.1
parcel:
default: *parcella
numeric_probability: 0.3
alphanumeric_probability: 0.3
alpha_probability: 0.4
lot_probability: 0.9
parcel_probability: 0.06
lot_plus_parcel_probability: 0.02
parcel_plus_lot_probability: 0.02

570
resources/addresses/cs.yaml Normal file
View File

@@ -0,0 +1,570 @@
# cs.yaml
# -------
# Czech language specification
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.9
alphanumeric_probability: 0.1
# Note: no combinations because of the house numbering scheme
numbers:
default: &cislo
canonical: číslo
abbreviated: č
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "č."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &a
canonical: a
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
conscription_numbers:
alphanumeric:
default:
canonical: číslo popisné
abbreviated: "č.p."
canonical_probability: 0.05
abbreviated_probability: 0.85
sample: true
sample_probability: 0.1
numeric:
direction: left
cross_streets:
and: *a
at: &na
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &rohu
canonical: rohu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &roh
canonical: roh
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &na_rohu
canonical: na rohu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *a
probability: 0.6
alternatives:
- alternative: *na
probability: 0.1
- alternative: *rohu
probability: 0.1
- alternative: *roh
probability: 0.1
- alternative: *na_rohu
probability: 0.1
between:
canonical: mezi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &patro
canonical: patro
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
nadzemni_podlazi: &nadzemni_podlazi
canonical: nadzemní podlaží
abbreviated: np
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.8
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
numeric_probability: 0.4
ordinal_probability: 0.6
etaz: &etaz
canonical: etáž
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
numeric_probability: 0.4
ordinal_probability: 0.6
prizemi: &prizemi
canonical: přízemí
sample: true
canonical_probability: 0.9
sample_probability: 0.1
podzemni_podlazi: &podzemni_podlazi
canonical: podzemní podlaží
abbreviated: pp
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
# e.g. podzemní podlaží 1
numeric:
direction: left
direction_probability: 0.8
# e.g. pp1
numeric_affix:
affix: pp
direction: left
# e.g. 1. podzemní podlaží
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podzemni_podlazi
"-1":
default: *podzemni_podlazi
"0":
default: *prizemi
probability: 0.9
alternatives:
- alternative: *patro
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *patro
probability: 0.8
alternatives:
- alternative: *nadzemni_podlazi
probability: 0.19
- alternative: *etaz
probability: 0.01
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: poblíž
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.75
alternatives:
- alternative:
canonical: v blízkém okolí
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: u
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: kolem
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
nearby:
default:
canonical: poblíž
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.45
alternatives:
- alternative:
canonical: blízko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: v blízkosti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tady poblíž
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tady
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: okolo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: v okolí
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
near_me:
default:
canonical: v blízkosti mně
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: v
probability: 0.7
alternatives:
- alternative:
canonical: ve
probability: 0.3
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prava
canonical: pravá
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &leva
canonical: levá
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prava
probability: 0.5
- alternative: *leva
probability: 0.5
cardinal_directions:
east: &vychod
canonical: východ
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: západ
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sever
canonical: sever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jih
canonical: jih
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sever
probability: 0.25
- alternative: *vychod
probability: 0.25
- alternative: *jih
probability: 0.25
- alternative: *zapad
probability: 0.25
entrances:
vchod: &vchod
canonical: vchod
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Wejście 1, Wejście A, etc.
alphanumeric: &entrance_alphanumeric
default: *vchod
numeric_probability: 0.1 # e.g. Wejście 1
alpha_probability: 0.85 # e.g. Wejście A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
schodiste: &schodiste
canonical: schodiště
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *schodiste
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *sever
- alternative: *jih
- alternative: *vychod
- alternative: *zapad
po_boxes:
postovni_prihradka: &postovni_prihradka
canonical: poštovní přihrádka
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštovní přihrádka 1234
alphanumeric:
default: *postovni_prihradka
numeric_probability: 0.9 # poštovní přihrádka 123
alpha_probability: 0.05 # poštovní přihrádka A
numeric_plus_alpha_probability: 0.04 # poštovní přihrádka 123G
alpha_plus_numeric_probability: 0.01 # poštovní přihrádka A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
apartaman: &apartaman
canonical: apartmán
abbreviated: apt
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
pokoj: &pokoj
canonical: pokoj
abbreviated: pok
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelar: &kancelar
canonical: kancelář
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *apartaman
probability: 0.9
alternatives:
- alternative: *pokoj
probability: 0.1
numeric_probability: 0.9 # e.g. apt. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. apt. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *pokoj
probability: 0.6
alternatives:
- alternative: *kancelar
probability: 0.4
numeric_probability: 0.95 # e.g. pokoj 1
numeric_plus_alpha_probability: 0.01 # e.g. pokoj 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
alpha_probability: 0.03 # e.g. pokoj A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *pokoj
numeric_probability: 0.95 # e.g. pokoj 1
numeric_plus_alpha_probability: 0.01 # e.g. pok 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
alpha_probability: 0.03 # e.g. pokoj A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

599
resources/addresses/da.yaml Normal file
View File

@@ -0,0 +1,599 @@
# da.yaml
# -------
# Danish language specification.
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- level
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.005
-
components:
- entrance
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.001
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *nummer
alphanumeric_phrase_probability: 0.0001
and:
default: &og
canonical: og
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *og
corner_of: &hjorne_af
canonical: hjørne af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &pa_hjornet_af
canonical: på hjørnet af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *og
probability: 0.7
alternatives:
- alternative: *hjorne_af
probability: 0.15
- alternative: *pa_hjornet_af
probability: 0.15
between:
canonical: mellem
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &sal
canonical: sal
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
etage: &etage
canonical: etage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
stuen: &stuen
canonical: stuen
abbreviated: st
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
stueetage: &stueetage
canonical: stueetage
sample: true
canonical_probability: 0.3
sample_probability: 0.7
kaelderen: &kaelderen
canonical: kælderen
abbreviated: kl
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
# e.g. 1 kælderen
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. kl
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *kaelderen
"-1":
default: *kaelderen
"0":
default: *stuen
probability: 0.9
alternatives:
- alternative: *stueetage
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *sal
probability: 0.7
alternatives:
- alternative: *etage
probability: 0.3
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: i nærheden af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: tæt på
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: tæt ved
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
nearby:
default:
canonical: i nærheden
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
alternatives:
- alternative:
canonical: rundt her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: nær her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: nær
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: omkring her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tæt på her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nær mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.8
alternatives:
- alternative:
canonical: i nærheden af mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tæt på mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
# Don't worry about agreement
in:
default:
canonical: i
probability: 0.8
alternatives:
- alternative:
canonical: om
probability: 0.1
- alternative:
canonical:
probability: 0.1
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &til_hojre
canonical: til højre
abbreviated: t.h
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &til_venstre
canonical: til venstre
abbreviated: t.v
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.6
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
middle: &midt_for
canonical: midt for
abbreviated: m.f
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.6
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: m.f
direction: right
whitespace_probability: 0.1
alternatives:
- alternative: *til_hojre
probability: 0.45
- alternative: *til_venstre
probability: 0.45
- alternative: *midt_for
probability: 0.1
cardinal_directions:
east: &ost
canonical: øst
abbreviated: ø
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: ø
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vest
canonical: vest
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &syd
canonical: syd
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *ost
probability: 0.25
- alternative: *syd
probability: 0.25
- alternative: *vest
probability: 0.25
entrances:
indgang: &indgang
canonical: indgang
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *indgang
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stiege
canonical: stiege
abbreviated: stg
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
trappe: &trappe
canonical: trappe
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trappe
probability: 0.8
alternatives:
- alternative: *stiege
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *nord
- alternative: *syd
- alternative: *ost
- alternative: *vest
po_boxes:
postboks: &postboks
canonical: postboks
abbreviated: pb
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Pb No 1234
boks: &boks
canonical: boks
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Boks No 1234
alphanumeric:
sample: false
default: *postboks
probability: 0.9
alternatives:
- alternative: *boks
probability: 0.1
numeric_probability: 0.9 # Pb 123
alpha_probability: 0.05 # Pb A
numeric_plus_alpha_probability: 0.04 # Pb 123G
alpha_plus_numeric_probability: 0.01 # Pb A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
lejlighed: &lejlighed
canonical: lejlighed
abbreviated: ljd
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.1
sample_probability: 0.3
numeric:
direction: left
null_phrase_probability: 0.5
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
hus: &hus
canonical: hus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
vaerelse: &vaerelse
canonical: værelse
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *lejlighed
probability: 0.8
alternatives:
- alternative: *hus
probability: 0.1
- alternative: *vaerelse
probability: 0.1
numeric_probability: 0.9 # e.g. Lejlighed 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Lejl A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2R, 2L, etc.
add_direction: true
add_direction_probability: 0.5
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Lejlighed Rechts
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1

728
resources/addresses/de.yaml Normal file
View File

@@ -0,0 +1,728 @@
# de.yaml
# -------
# Note: this will only apply to the German language code, which encompasses Germany,
# Austria, Switzerland (but not Swiss-German, which has its own language code),
# Lichtenstein, Luxembourg (Luxembourgish has its own language code), and part of Belgium.
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.9
alphanumeric_probability: 0.1
combinations:
# e.g. 2/34, more common way to specify a unit number in German
# if unit exists in the first place
-
components:
- house_number
- unit
label: house_number
separators:
- separator: /
probability: 0.8
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.05
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
gebaude: &gebaude
canonical: gebäude
abbreviated: geb
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.5
sample_probability: 0.05
numeric:
direction: left
alphanumeric:
default: *nummer
probability: 0.95
alternatives:
- alternative: *gebaude
probability: 0.05
alphanumeric_phrase_probability: 0.05
conscription_numbers:
alphanumeric:
default:
canonical: konskriptionsnummer
abbreviated: konskr. nr
canonical_probability: 0.15
abbreviated_probability: 0.65
sample: true
sample_probability: 0.2
numeric:
direction: left
and:
default: &und
canonical: und
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *und
corner_of: &ecke_von
canonical: ecke von
at_the_corner_of: &an_der_ecke_von
canonical: an der ecke von
intersection:
default: *und
probability: 0.7
alternatives:
- alternative: *ecke_von
probability: 0.15
- alternative: *an_der_ecke_von
probability: 0.15
between:
canonical: zwischen
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &obergeschoss
canonical: obergeschoss
abbreviated: og
sample: true
add_number_phrase: true
add_number_phrase_probability: 0.1
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: og
direction: right
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.3
numeric_affix_probability: 0.5
ordinal_probability: 0.2
etage: &etage
canonical: etage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
stock: &stock
canonical: stock
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.1
ordinal_probability: 0.9
erdgeschoss: &erdgeschoss
canonical: erdgeschoss
abbreviated: eg
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
untergeschoss: &untergeschoss
canonical: untergeschoss
abbreviated: ug
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
# e.g. Basement 1
numeric:
direction: left
# e.g. 1ug
numeric_affix:
affix: ug
direction: left
# e.g. 1. UG
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
unterste_etage: &unterste_etage
canonical: unterste etage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
oberste_etage: &oberste_etage
canonical: oberste etage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
aliases:
"<-1":
default: *untergeschoss
"-1":
default: *untergeschoss
"0":
default: *erdgeschoss
probability: 0.9
alternatives:
- alternative: *unterste_etage
probability: 0.1
"top":
default: *obergeschoss
probability: 0.75
alternatives:
- alternative: *stock
probability: 0.1
- alternative: *etage
probability: 0.05
- alternative: *oberste_etage
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *obergeschoss
probability: 0.85
alternatives:
- alternative: *stock
probability: 0.1
- alternative: *etage
probability: 0.05
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: nähe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.5
alternatives:
- alternative:
canonical: bei
probability: 0.3
- alternative:
canonical: nah
probability: 0.15
- alternative:
canonical: nahe an
probability: 0.05
nearby:
default:
canonical: hier in der nähe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
alternatives:
- alternative:
canonical: in der nähe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.3
- alternative:
canonical: in der nähe hier
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: in der nähe von
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: nahe gelegen
probability: 0.05
- alternative:
canonical: hier in der gegend
probability: 0.05
near_me:
default:
canonical: in meiner nähe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.9
alternatives:
- alternative:
canonical: in der nähe zu mir
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
# Don't worry about agreement
in:
default:
canonical: in
probability: 0.6
alternatives:
- alternative:
canonical: im
probability: 0.2
- alternative:
canonical: um
probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &rechts
canonical: rechts
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: r
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &links
canonical: links
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: l
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: *rechts
probability: 0.5
- alternative: *links
probability: 0.5
cardinal_directions:
east: &ost
canonical: ost
abbreviated: o
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: o
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &west
canonical: west
abbreviated: w
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: süd
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *ost
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *west
probability: 0.25
entrances:
eingang: &eingang
canonical: eingang
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *eingang
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stiege
canonical: stiege
abbreviated: stg
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
treppe: &treppe
canonical: treppe
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stiege
probability: 0.6
alternatives:
- alternative: *treppe
probability: 0.4
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *ost
- alternative: *west
po_boxes:
postfach: &postfach
canonical: postfach
abbreviated: pf
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # PF No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *postfach
numeric_probability: 0.9 # Apdo 123
alpha_probability: 0.05 # Apdo A
numeric_plus_alpha_probability: 0.04 # Apdo 123G
alpha_plus_numeric_probability: 0.01 # Apdo A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
halle: &halle
canonical: halle
numeric:
direction: left
wohnung: &wohnung
canonical: wohnung
abbreviated: whg
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.1
sample_probability: 0.3
plural:
canonical: wohnungen
numeric:
direction: left
# Wohnung nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.2
haus: &haus
canonical: haus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
wohnungsnummer: &wohnungsnummer
canonical: wohnungsnummer
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
appartement: &appartement
canonical: appartement
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
buro: &buro
canonical: büro
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
zimmer: &zimmer
canonical: zimmer
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *wohnung
probability: 0.8
alternatives:
- alternative: *wohnungsnummer
probability: 0.1
- alternative: *appartement
probability: 0.05
- alternative: *haus
probability: 0.05
numeric_probability: 0.9 # e.g. Wohnung 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Wohnung A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2R, 2L, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Wohnung Rechts
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1
zone:
residential: *unit_alphanumeric
commercial:
default: *buro
probability: 0.9
alternatives:
- alternative: *zimmer
probability: 0.1
university:
default: *halle
probability: 0.9
alternatives:
- alternative: *zimmer
probability: 0.1
countries:
# Austria
at:
# Staircase and entrance numbers more common
components:
level:
null_probability: 0.6
alphanumeric_probability: 0.3
standalone_probability: 0.1
staircase:
null_probability: 0.9
alphanumeric_probability: 0.1
entrance:
null_probability: 0.99
alphanumeric_probability: 0.01
unit:
null_probability: 0.4
alphanumeric_probability: 0.6
# Combined apartment numbers are very common
combinations:
# e.g. Neubaugasse 55/A/1/5
-
components:
- house_number
- entrance
- staircase
- unit
label: house_number
separators:
- separator: /
probability: 0.98
- separator: "-"
probability: 0.02
probability: 0.9
# e.g. Neubaugasse 55/1/5
-
components:
- house_number
- staircase
- unit
label: house_number
separators:
- separator: /
probability: 0.98
- separator: "-"
probability: 0.02
probability: 0.8
# e.g. Neubaugasse 55/5
-
components:
- house_number
- unit
label: house_number
probability: 0.7
separators:
- separator: /
probability: 0.98
- separator: "-"
probability: 0.02
units:
top: &top
canonical: top
numeric:
direction: left
alphanumeric: &austria_units_alphanumeric
default: *top
probability: 0.75
alternatives:
- alternative: *haus
probability: 0.15
- alternative: *wohnung
probability: 0.05
- alternative: *wohnungsnummer
probability: 0.025
- alternative: *appartement
probability: 0.025

368
resources/addresses/el.yaml Normal file
View File

@@ -0,0 +1,368 @@
# el.yaml
# -------
# Greek language specification
alphabet: αβγδεζηθικλμνξοπρστυφχψω
alphabet_probability: 0.8
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.9
alphanumeric_probability: 0.1
unit:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
levels:
orofos: &orofos
canonical: όροφος
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
orofos_latin: &orofos_latin
canonical: órofos
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
isogelo: &isogelo
canonical: ισόγειο
sample: true
canonical_probability: 0.8
sample_probability: 0.2
isogelo_latin: &isogelo_latin
canonical: isógeio
sample: true
canonical_probability: 0.6
sample_probability: 0.4
imiorofos: &imiorofos
canonical: ημιώροφος
sample: true
canonical_probability: 0.8
sample_probability: 0.2
imiorofos_latin: &imiorofos_latin
canonical: imiórofos
sample: true
canonical_probability: 0.6
sample_probability: 0.4
ypogeio: &ypogeio
canonical: υπόγειο
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
ypogeio_latin: &ypogeio_latin
canonical: ypógeio
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
aliases:
"<-1":
default: *ypogeio
probability: 0.9
alternatives:
- alternative: *ypogeio_latin
probability: 0.1
"-1":
default: *ypogeio
probability: 0.9
alternatives:
- alternative: *ypogeio_latin
probability: 0.1
half_floors:
default: *imiorofos
probability: 0.9
alternatives:
- alternative: *imiorofos_latin
probability: 0.1
"0":
default: *isogelo
probability: 0.9
alternatives:
- alternative: *isogelo_latin
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *orofos
probability: 0.9
alternatives:
- alternative: *orofos_latin
probability: 0.1
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
entrances:
eisodos: &eisodos
canonical: είσοδος
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
eisodos_latin: &eisodos_latin
canonical: eísodos
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# είσοδος 1, etc.
alphanumeric:
default: *eisodos
probability: 0.99
alternatives:
- alternative: *eisodos_latin
probability: 0.01
numeric_probability: 0.1
alpha_probability: 0.9
staircases:
skala: &skala
canonical: σκάλα
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
skala_latin: &skala_latin
canonical: skála
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, skála A, skála 1, etc.
default: *skala
probability: 0.9
alternatives:
- alternative: *skala_latin
probability: 0.1
numeric_probability: 0.6 # e.g. skála 1
alpha_probability: 0.35 # e.g. skála A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
po_boxes:
tachydromiki_thyrida: &tachydromiki_thyrida
canonical: ταχυδρομική θυρίδα
abbreviated: τ.θ
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
tachydromiki_thyrida_latin: &tachydromiki_thyrida_latin
canonical: tachydromikí thyrída
abbreviated: t.th
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
alphanumeric:
default: *tachydromiki_thyrida
probability: 0.8
alternatives:
- alternative: *tachydromiki_thyrida_latin
probability: 0.2
numeric_probability: 0.9 # t.th 123
alpha_probability: 0.05 # t.th А
numeric_plus_alpha_probability: 0.04 # t.th 123А
alpha_plus_numeric_probability: 0.01 # t.th А123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
diamerisma: &diamerisma
canonical: διαμέρισμα
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
diamerisma_latin: &diamerisma_latin
canonical: diamérisma
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
domatio: &domatio
canonical: δωμάτιο
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
domatio_latin: &domatio_latin
canonical: domátio
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
grafeiou: &grafeiou
canonical: γραφείου
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
grafeiou_latin: &grafeiou_latin
canonical: grafeíou
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
alphanumeric: &unit_alphanumeric
default: *diamerisma
probability: 0.8
alternatives:
- alternative: *diamerisma_latin
probability: 0.1
- alternative: *domatio
probability: 0.09
- alternative: *domatio_latin
probability: 0.01
numeric_probability: 0.9 # e.g. diamérisma 1
numeric_plus_alpha_probability: 0.03 # e.g. 1А
alpha_plus_numeric_probability: 0.03 # e.g. AА1
alpha_probability: 0.04 # e.g. διαμέρισμα А
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1
zone:
residential: *unit_alphanumeric
commercial:
default: *grafeiou
probability: 0.9
alternatives:
- alternative: *grafeiou_latin
probability: 0.1
university:
default: *domatio
probability: 0.9
alternatives:
- alternative: *domatio_latin
probability: 0.1

1468
resources/addresses/en.yaml Normal file

File diff suppressed because it is too large Load Diff

1189
resources/addresses/es.yaml Normal file

File diff suppressed because it is too large Load Diff

470
resources/addresses/et.yaml Normal file
View File

@@ -0,0 +1,470 @@
# et.yaml
# -------
# Estonian language specification.
components:
level:
null_probability: 0.97
alphanumeric_probability: 0.02
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.95
- separator: " - "
probability: 0.05
probability: 0.7
numbers:
default: &number
canonical: number
abbreviated: nbr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *number
alphanumeric_phrase_probability: 0.0001
and:
default: &ja
canonical: ja
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *ja
corner_of: &nurgas
canonical: nurgas
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &nurgal
canonical: nurgal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ja
probability: 0.7
alternatives:
- alternative: *nurgas
probability: 0.15
- alternative: *nurgal
probability: 0.15
between:
canonical: vahel
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &korrusel
canonical: korrusel
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
numeric_probability: 0.4
ordinal_probability: 0.6
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.8
sample_probability: 0.2
kelder: &kelder
canonical: kelder
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
keldris: &keldris
canonical: keldris
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 keldris
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. keldris
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.05
numeric_affix_probability: 0.9
ordinal_probability: 0.05
aliases:
"<-1":
default: *kelder
probability: 0.85
alternatives:
- alternative: *keldris
probability: 0.15
"-1":
default: *kelder
probability: 0.85
alternatives:
- alternative: *keldris
probability: 0.1
- alternative: *korrusel
probability: 0.05
"1":
default: *parter
probability: 0.5
alternatives:
- alternative: *korrusel
probability: 0.5
numbering_starts_at: 1
alphanumeric:
default: *korrusel
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: siin lähedal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: siinkandis
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: lähedal mulle
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.7
nearby_probability: 0.2
near_me_probability: 0.1
directions:
right: &paremal
canonical: paremal
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: p
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
paramale: &paremale
canonical: paremale
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: p
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &vasakul
canonical: vasakul
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
vasakule: &vasakule
canonical: vasakule
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *paremal
probability: 0.25
- alternative: *paremale
probability: 0.25
- alternative: *vasakul
probability: 0.25
- alternative: *vasakule
probability: 0.25
cardinal_directions:
east: &ida
canonical: ida
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
west: &laas
canonical: lääs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
north: &pohi
canonical: põhi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
south: &louna
canonical: lõuna
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *pohi
probability: 0.25
- alternative: *ida
probability: 0.25
- alternative: *louna
probability: 0.25
- alternative: *laas
probability: 0.25
entrances:
sissepaas: &sissepaas
canonical: sissepääs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *sissepaas
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
trepikoda: &trepikoda
canonical: trepikoda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trepikoda
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *pohi
- alternative: *louna
- alternative: *ida
- alternative: *laas
po_boxes:
postboks: &abonementpostkast
canonical: abonementpostkast
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # abonementpostkast #1234
kast: &kast
canonical: kast
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Kast #1234
alphanumeric:
sample: false
default: *abonementpostkast
probability: 0.9
alternatives:
- alternative: *kast
probability: 0.1
numeric_probability: 0.9 # 123
alpha_probability: 0.05 # A
numeric_plus_alpha_probability: 0.04 # 123G
alpha_plus_numeric_probability: 0.01 # A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
korter: &korter
canonical: korter
abbreviated: k
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
null_phrase_probability: 0.3
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
ruumi: &ruumi
canonical: ruumi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *korter
probability: 0.9
alternatives:
- alternative: *ruumi
probability: 0.1
numeric_probability: 1.0 # e.g. korter 1
# Separate random probability for adding directions like 2P, 2V, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Korter vasakule
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05

375
resources/addresses/eu.yaml Normal file
View File

@@ -0,0 +1,375 @@
# eu.yaml
# -------
# Basque language specification
components:
level:
# If no floor number is specified
null_probability: 0.8
alphanumeric_probability: 0.2
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.4
alphanumeric_probability: 0.6
combinations:
-
components:
- level
- unit
label: unit
separators:
- separator: "-"
probability: 0.85
- separator: "/"
probability: 0.15
probability: 0.7
and:
default: &eta
canonical: eta
abbreviated: "&"
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
house_numbers:
# zenbakirik gabe (zk.g) addresses
no_number:
default:
canonical: zenbakirik gabe
abbreviated: zk.g
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.6
sample_probability: 0.3
no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified
levels:
floor: &solairua
canonical: solairua
abbreviated: sol
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
numeric:
direction: left
# e.g. 2. solairua
ordinal:
direction: right
numeric_probability: 0.25
ordinal_probability: 0.75
# Ground floor
beheko_solairua: &beheko_solairua
canonical: beheko solairua
abbreviated: beheko sol
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.3
sample_probability: 0.1
behe_solairua: &behe_solairua
canonical: behe-solairua
abbreviated: behe-sol
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.3
sample_probability: 0.1
aliases:
"0":
default: *beheko_solairua
probability: 0.5
alternatives:
- alternative: *behe_solairua
probability: 0.4
- alternative: *solairua
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *solairua
numeric_probability: 0.99
alpha_probability: 0.01
blocks:
alphanumeric:
default:
canonical: blokea
abbreviated: bl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.2
ordinal_probability: 0.8
categories:
near:
default:
canonical: gertu
nearby:
default:
canonical: gertuko
probability: 0.7
alternatives:
- alternative:
canonical: hemen gertu
probability: 0.2
- alternative:
canonical: hemen
probability: 0.1
near_me:
default:
canonical: me gertu
# Probabilities of each phrase
near_probability: 0.7
nearby_probability: 0.2
near_me_probability: 0.1
cross_streets:
and: *eta
txoko: &txoko
canonical: txoko
sample: true
canonical_probability: 0.7
sample_probability: 0.3
intersection:
default: *eta
probability: 0.8
alternatives:
- alternative: *txoko
probability: 0.2
between:
canonical: arteko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probabililty: 0.5
po_boxes:
posta_kutxa: &posta_kutxa
canonical: posta-kutxa
abbreviated: p.-ku
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
numeric_probability: 1.0
alphanumeric:
sample: false
default: *posta_kutxa
numeric_probability: 0.9 # P.-Ku 123
alpha_probability: 0.05 # P.-Ku A
numeric_plus_alpha_probability: 0.04 # P.-Ku 123G
alpha_plus_numeric_probability: 0.01 # P.-Ku A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default:
canonical: posta-kodea
abbreviated: p.-k
sample: true
canonical_probability: 0.01
abbreviated_probability: 0.9
sample_probability: 0.09
numeric:
direction: left
numeric_affix:
affix: p.-k.
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.7
numeric_probability: 0.18
numeric_affix_probability: 0.12
strict_numeric: true
directions:
right: &eskuina
canonical: eskuina
abbreviated: esk
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: esk.
direction: right
whitespace_probability: 0.1
numeric_probability: 0.9
numeric_affix_probability: 0.1
left: &ezkerkada
canonical: ezkerkada
abbreviated: ezk
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: ezk.
direction: right
whitespace_probability: 0.1
numeric_probability: 0.9
numeric_affix_probability: 0.1
ezkerreko: &ezkerreko
canonical: ezkerreko
abbreviated: ezk.-ko
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
alternatives:
- alternative: *eskuina
probability: 0.5
- alternative: *ezkerkada
probability: 0.5
entrances:
sarrera: &sarrera
canonical: sarrera
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Sarrera 1, Sarrera A, etc.
alphanumeric:
default: *sarrera
numeric_probability: 0.1 # e.g. Sarrera 1
alpha_probability: 0.85 # e.g. Sarrera A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
modifier:
alternatives:
- alternative: *eskuina
- alternative: *ezkerreko
staircases:
eskailera: &eskailera
canonical: eskailera
abbreviated: eskra
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Eskra A, Eskra 1, etc.
default: *eskailera
numeric_probability: 0.6 # e.g. Eskra 1
alpha_probability: 0.35 # e.g. Eskra A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left # e.g. Ezk.-ko Eskra
modifier:
alternatives:
- alternative: *eskuina
- alternative: *ezkerreko
units:
flat: &apartamentu
canonical: apartamentu
abbreviated: aptu
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
# If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B"
null_phrase_probability: 0.15
ordinal:
direction: right
numeric_probability: 0.6
ordinal_probability: 0.4
alphanumeric: &unit_alphanumeric
default: *apartamentu
# Separate random probability for adding directions like 2. Ezk, 2 Esk, etc.
add_direction: true
add_direction_probability: 0.1
add_direction_numeric: true # Only for numbers
add_direction_standalone: true # A unit can be as simple as "D"
numeric_probability: 0.7 # e.g. 1
numeric_plus_alpha_probability: 0.01 # e.g. 1A
alpha_plus_numeric_probability: 0.01 # e.g. A1
alpha_probability: 0.28 # e.g. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

403
resources/addresses/fi.yaml Normal file
View File

@@ -0,0 +1,403 @@
# fi.yaml
# -------
# Finnish language specification.
components:
level:
null_probability: 0.97
alphanumeric_probability: 0.02
standalone_probability: 0.01
staircase:
null_probability: 0.9
alphanumeric_probability: 0.1
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- staircase
- unit
label: unit
separators:
- separator: " "
probability: 0.8
- separator: "-"
probability: 0.1
- separator: "/"
probability: 0.05
- separator: " - "
probability: 0.05
probability: 0.85
numbers:
default: &numero
canonical: numero
abbreviated: nro
sample: true
# Probabilities
canonical_probability: 0.1
abbreviated_probability: 0.5
sample_probability: 0.4
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.7
numeric_affix_probability: 0.3
house_numbers:
alphanumeric:
default: *numero
alphanumeric_phrase_probability: 0.0001
and:
default: &ja
canonical: ja
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *ja
corner_of: &kulmassa
canonical: kulmassa
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ja
probability: 0.7
alternatives:
- alternative: *kulmassa
probability: 0.3
between:
canonical: välillä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &kerros
canonical: kerros
abbreviated: krs
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
numbering_starts_at: 1
alphanumeric:
default: *kerros
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: lähellä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: lähistöllä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: lähellä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tässä lähellä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: täällä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: lähellä minua
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.7
nearby_probability: 0.2
near_me_probability: 0.1
directions:
right: &oikea
canonical: oikea
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: o
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
oikealla: &oikealla
canonical: oikealla
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: o
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &vasen
canonical: vasen
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
vasemmalla: &vasemmalla
canonical: vasemmalla
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *oikea
probability: 0.25
- alternative: *oikealla
probability: 0.25
- alternative: *vasen
probability: 0.25
- alternative: *vasemmalla
probability: 0.25
cardinal_directions:
east: &itaan
canonical: itään
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
west: &lansi
canonical: länsi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
north: &pohja
canonical: pohja
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
south: &etela
canonical: etelä
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *pohja
probability: 0.25
- alternative: *itaan
probability: 0.25
- alternative: *etela
probability: 0.25
- alternative: *lansi
probability: 0.25
entrances:
sissepaas: &sisaankaynti
canonical: sisäänkäynti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Portaikko 1, Portaikko A, etc.
alphanumeric: &entrance_alphanumeric
default: *sisaankaynti
numeric_probability: 0.1 # e.g. Portaikko 1
alpha_probability: 0.85 # e.g. Portaikko A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
portaikko: &portaikko
canonical: portaikko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *portaikko
alpha_probability: 1.0
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *pohja
- alternative: *etela
- alternative: *itaan
- alternative: *lansi
po_boxes:
postilokero: &postilokero
canonical: postilokero
abbreviated: pl
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # PL #1234
alphanumeric:
sample: false
default: *postilokero
numeric_probability: 0.9 # 123
alpha_probability: 0.05 # A
numeric_plus_alpha_probability: 0.04 # 123G
alpha_plus_numeric_probability: 0.01 # A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
asunto: &asunto
canonical: asunto
abbreviated: as
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.7
sample_probability: 0.1
numeric:
direction: left
null_phrase_probability: 0.3
# as nro 4
add_number_phrase: true
add_number_phrase_probability: 0.05
ruumi: &huone
canonical: huone
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *asunto
probability: 0.9
alternatives:
- alternative: *huone
probability: 0.1
numeric_probability: 1.0 # e.g. as 1
# Separate random probability for adding directions like 2O, 2V, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. asunto
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05

951
resources/addresses/fr.yaml Normal file
View File

@@ -0,0 +1,951 @@
# Note: default config is for France. Canadian, Swiss, Belgian, and other
# conventions go in country overrides
components:
level:
# If no floor number is specified
null_probability: 0.8
alphanumeric_probability: 0.2
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.8
alphanumeric_probability: 0.2
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: /
probability: 0.8
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.005
numbers:
default: &numero
canonical: numéro
abbreviated: "nº"
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
sample_exclude:
- "#" # Used in numeric affix. Needs to be quoted, otherwise it's a comment
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
# Probabilities for numbers
numeric_probability: 0.7
numeric_affix_probability: 0.3
and:
default: &and
canonical: et
abbreviated: "&"
canonical_probability: 0.7
abbreviated_probability: 0.25
sample: true
sample_probability: 0.05
house_numbers:
# sans numéro (s/n) addresses
no_number:
canonical: sans numéro
abbreviated: s/n
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numero
alphanumeric_phrase_probability: 0.01
no_number_probability: 0.05 # With this probability, use sin número if no house_number is specified
levels:
floor: &etage
canonical: étage
abbreviated: ét
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.05
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.75
ordinal_probability: 0.25
niveau: &niveau
canonical: niveau
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.05
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.75
ordinal_probability: 0.25
bel_etage: &bel_etage
canonical: bel étage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
etage_noble: &etage_noble
canonical: étage noble
sample: true
canonical_probability: 0.9
sample_probability: 0.1
dernier_etage: &dernier_etage
canonical: dernier étage
sample: true
canonical_probability: 0.9
sample_probability: 0.1
basement: &sous_sol
canonical: sous-sol
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.99
numeric_probability: 0.005
ordinal_probability: 0.005
sub_basement: &soubassement
canonical: soubassement
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 2
number_subtract_abs_value: 1
standalone_probability: 0.99
numeric_probability: 0.005
ordinal_probability: 0.005
mezzanine: &entresol
canonical: entresol
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Ground floor
rez_de_chaussee: &rez_de_chaussee
canonical: rez-de-chaussée
abbreviated: rdc
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
rez_de_chaussee_bas: &rez_de_chaussee_bas
canonical: rez-de-chaussée bas
abbreviated: rcb
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
rez_de_chaussee_haut: &rez_de_chaussee_haut
canonical: rez-de-chaussée haut
abbreviated: rch
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
parterre: &parterre
canonical: parterre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
rez_de_jardin: &rez_de_jardin
canonical: rez-de-jardin
sample: true
canonical_probability: 0.8
sample_probability: 0.2
aliases:
"<-1":
default: *sous_sol
probability: 0.6
alternatives:
- alternative: *soubassement
probability: 0.3995
- alternative: *etage
probability: 0.0005
"-1":
default: *sous_sol
probability: 0.9995
alternatives:
- alternative: *etage
probability: 0.0005
half_floors:
default: *entresol
"0":
default: *rez_de_chaussee
probability: 0.74
alternatives:
- alternative: *rez_de_jardin
probability: 0.01
- alternative: *rez_de_chaussee_bas
probability: 0.1
- alternative: *rez_de_chaussee_haut
probability: 0.1
- alternative: *etage
probability: 0.05
"1":
default: *etage
probability: 0.8
alternatives:
- alternative: *bel_etage
probability: 0.1
- alternative: *etage_noble
probability: 0.1
top:
default: *etage
probability: 0.9
alternatives:
- alternative: *dernier_etage
probability: 0.1
alphanumeric:
default: *etage
probability: 0.95
alternatives:
- alternative: *niveau
probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
numbering_starts_at: 0
cross_streets:
# 26th & 6th Avenue
and: *and
# 26th @ Broadway
a: &a
canonical: à
sample: true
canonical_probability: 0.8
sample_probability: 0.2
au: &au
canonical: au
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &langle_de
canonical: l'angle de
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &a_langle_de
canonical: à l'angle de
intersection:
default: *and
probability: 0.7
alternatives:
- alternative: *a
probability: 0.025
- alternative: *au
probability: 0.025
- alternative: *langle_de
probability: 0.15
- alternative: *a_langle_de
probability: 0.1
# 26th betw 5th Ave and 6th Ave
between:
canonical: entre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5 # Probability of using parentheses e.g. (between 5th and 6th)
directions:
right: &droit
canonical: droit
abbreviated: dr
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: d
direction: right
whitespace_probability: 0.1
numeric_probability: 0.7
numeric_affix_probability: 0.3
left: &gauche
canonical: gauche
abbreviated: g
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: g
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
rear: &arriere
canonical: arrière
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
front: &avant
canonical: avant
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *droit
probability: 0.49
- alternative: *gauche
probability: 0.49
- alternative: *arriere
probability: 0.01
- alternative: *avant
probability: 0.01
anteroposterior:
alternatives:
- alternative: *avant
probability: 0.5
- alternative: *arriere
probability: 0.5
lateral:
alternatives:
- alternative: *droit
probability: 0.5
- alternative: *gauche
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &ouest
canonical: ouest
abbreviated: o
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: o
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *ouest
probability: 0.25
entrances:
entrance: &entrance
canonical: entrance
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Entrance 1, Entrance A, etc.
alphanumeric: &entrance_alphanumeric
default: *entrance
numeric_probability: 0.1 # e.g. Entrance 1
alpha_probability: 0.85 # e.g. Entrnace A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
direction: right # e.g. Entrance Nord
direction_probability: 0.95
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *ouest
- alternative: *droit
- alternative: *gauche
- alternative: *arriere
- alternative: *avant
staircases:
escalier: &escalier
canonical: escalier
abbreviated: esc
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Escalier A, Esc 1, etc.
default: *escalier
numeric_probability: 0.6 # e.g. Escalier 1
alpha_probability: 0.35 # e.g. Escalier A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Escalier Izq
direction_probability: 0.9
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *ouest
- alternative: *droit
- alternative: *gauche
- alternative: *arriere
- alternative: *avant
po_boxes:
boite_postal: &boite_postal
canonical: boîte postale
abbreviated: bp
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # BP No 1234
numeric_probability: 1.0
case_postal: &case_postal
canonical: case postale
abbreviated: cp
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # CP No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *boite_postal
numeric_probability: 0.9 # BP 123
alpha_probability: 0.05 # BP A
numeric_plus_alpha_probability: 0.04 # BP 123G
alpha_plus_numeric_probability: 0.01 # BP A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
flat: &appartement
canonical: appartement
abbreviated: app
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
unit: &unite
canonical: unité
abbreviated: u
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
suite: &suite
canonical: suite
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.4
office: &bureau
canonical: bureau
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.3
door: &porte
canonical: porte
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
room: &salle
canonical: salle
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
chambre: &chambre
canonical: chambre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
boite: &boite
canonical: boîte
abbreviated: bte
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
lot: &lotissement
canonical: lotissement
abbreviated: lot
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
parcelle: &parcelle
canonical: parcelle
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
allotments:
lot:
default: *lotissement
numeric_probability: 0.8
alphanumeric_probability: 0.1
alpha_probability: 0.1
parcel:
default: *parcelle
numeric_probability: 0.3
alphanumeric_probability: 0.3
alpha_probability: 0.4
lot_probability: 0.9
parcel_probability: 0.06
lot_plus_parcel_probability: 0.02
parcel_plus_lot_probability: 0.02
alphanumeric: &unit_alphanumeric
default: *appartement
probability: 0.85
alternatives:
# e.g. just plain #3 or No. 4
- alternative: *numero
probability: 0.05
- alternative: *porte
probability: 0.095
- alternative: *chambre
probability: 0.005
numeric_probability: 0.9 # e.g. Appartement 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Appartement A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2D, 2G, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Unité Gauche
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1
zones:
residential: *unit_alphanumeric
commercial:
default: *bureau
probability: 0.8
alternatives:
- alternative: *suite
probability: 0.2
numeric_probability: 0.9 # e.g. Bureau 1
numeric_plus_alpha_probability: 0.01 # e.g. Bureau 1A
alpha_plus_numeric_probability: 0.01 # e.g. Bureau A1
alpha_probability: 0.08 # e.g. Bureau A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
industrial:
default: *lotissement
probability: 0.5
alternatives:
- alternative: *bureau
probability: 0.3
- alternative: *unite
probability: 0.19
- alternative: *parcelle
probability: 0.01
numeric_probability: 0.9 # e.g. Lote 1
numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lote A1
alpha_probability: 0.08 # e.g. Lote A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *salle
probability: 0.9
alternatives:
- alternative: *porte
probability: 0.1
numeric_probability: 0.9 # e.g. Salle 1
numeric_plus_alpha_probability: 0.01 # e.g. Salle 1A
alpha_plus_numeric_probability: 0.01 # e.g. Salle A1
alpha_probability: 0.08 # e.g. Salle A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
categories:
near:
default:
canonical: près de
probability: 0.6
alternatives:
- alternative:
canonical: à coté de
probability: 0.05
- alternative:
canonical: proche de
probability: 0.05
- alternative:
canonical: proches de
probability: 0.05
- alternative:
canonical: a cote de
probability: 0.05
- alternative:
canonical: pres de
probability: 0.05
- alternative:
canonical: aux environs de
probability: 0.05
- alternative:
canonical: à proximité de
probability: 0.05
- alternative:
canonical: a proximite de
probability: 0.05
nearby:
default:
canonical: proche
probability: 0.4
alternatives:
- alternative:
canonical: à coté
probability: 0.05
- alternative:
canonical: a cote
probability: 0.05
- alternative:
canonical: près d'ici
probability: 0.05
- alternative:
canonical: près dici
probability: 0.05
- alternative:
canonical: pres d'ici
probability: 0.05
- alternative:
canonical: pres dici
probability: 0.05
- alternative:
canonical: près de là
probability: 0.05
- alternative:
canonical: pres de la
probability: 0.05
- alternative:
canonical: par ici
probability: 0.05
- alternative:
canonical: dans les alentours
probability: 0.05
- alternative:
canonical: à proximité de là
probability: 0.05
- alternative:
canonical: a proximite de la
probability: 0.05
near_me:
default:
canonical: proche de chez moi
probability: 0.6
alternatives:
- alternative:
canonical: près de moi
probability: 0.1
- alternative:
canonical: pres de moi
probability: 0.1
- alternative:
canonical: à proximité de moi
probability: 0.1
- alternative:
canonical: a proximite de moi
probability: 0.1
in:
default:
canonical: à
probability: 0.7
alternatives:
- alternative:
canonical: en
probability: 0.1
- alternative:
canonical: a
probability: 0.1
- alternative:
canonical: dans
probability: 0.1
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
countries:
# Belgium
be:
units:
alphanumeric:
default: *boite
probability: 0.75
alternatives:
- alternative: *appartement
probability: 0.1
# e.g. just plain #3 or No. 4
- alternative: *numero
probability: 0.05
- alternative: *porte
probability: 0.095
- alternative: *chambre
probability: 0.005
# Canada
ca:
components:
unit:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
-
components:
- unit
- house_number
label: house_number
separators:
- separator: /
probability: 0.04
- separator: "-"
probability: 0.95
- separator: " - "
probability: 0.01
probability: 0.1
levels:
numbering_starts_at: 1
aliases:
"1":
# Have to do this because etage is numeric
# and has keys like "numeric_probability" which
# we don't want to infect rez_de_chausee when doing
# a recursive merge
default: *etage
probability: 0.1
alternatives:
- alternative: *rez_de_chaussee
probability: 0.8
- alternative: *bel_etage
probability: 0.05
- alternative: *etage_noble
probability: 0.05
units:
alphanumeric:
# More common to use in in Canada, as in the US
use_floor_probability: 0.35
po_boxes:
alphanumeric:
default: *case_postal
# Switzerland
ch:
levels:
aliases:
"0":
default: *parterre
probability: 0.9
alternatives:
- alternative: *rez_de_chaussee
probability: 0.05
- alternative: *etage
probability: 0.05
po_boxes:
alphanumeric:
default: *case_postal

269
resources/addresses/he.yaml Normal file
View File

@@ -0,0 +1,269 @@
# he.yaml
# -------
# Hebrew language specification
alphabet: אבגדהוזחטיכךלמםנןסעפףצץקרשת
alphabet_probability: 0.8
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.9
alphanumeric_probability: 0.1
unit:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
-
components:
- house_number
- entrance
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.7
-
components:
- house_number
- entrance
label: house_number
separators:
- separator: " "
probability: 0.5
- separator: ""
probability: 0.2
- separator: "/"
probability: 0.1
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.7
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
levels:
koma: &koma
canonical: קומה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
koma_latin: &koma_latin
canonical: koma
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
komat_karka: &komat_karka
canonical: קומת קרקע
sample: true
canonical_probability: 0.8
sample_probability: 0.2
komat_karka_latin: &komat_karka_latin
canonical: komát karká
sample: true
canonical_probability: 0.6
sample_probability: 0.4
martef: &martef
canonical: מרתף
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
martef_latin: &martef_latin
canonical: martef
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
aliases:
"<-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"0":
default: *komat_karka
probability: 0.9
alternatives:
- alternative: *komat_karka_latin
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *koma
probability: 0.9
alternatives:
- alternative: *koma_latin
probability: 0.1
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
entrances:
knisa: &knisa
canonical: כניסה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
knisa_latin: &knisa_latin
canonical: knisa
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# вход 1, вход A, etc.
alphanumeric:
default: *knisa
probability: 0.99
alternatives:
- alternative: *knisa_latin
probability: 0.01
numeric_probability: 0.1
alpha_probability: 0.9
po_boxes:
ta_doar: &ta_doar
canonical: תיבת דואר
abbreviated: ת.ד.
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
ta_doar_latin: &ta_doar_latin
canonical: abonementnyy pochtovyy yashchik
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric:
default: *ta_doar
probability: 0.8
alternatives:
- alternative: *ta_doar_latin
probability: 0.2
numeric_probability: 0.9 # ta doar 123
alpha_probability: 0.05 # ta doar А
numeric_plus_alpha_probability: 0.04 # ta doar 123А
alpha_plus_numeric_probability: 0.01 # ta doar А123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
dira: &dira
canonical: דירה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
dira_latin: &dira_latin
canonical: dira
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *dira
probability: 0.9
alternatives:
- alternative: *dira_latin
probability: 0.1
numeric_probability: 0.9 # e.g. dira 1
numeric_plus_alpha_probability: 0.03 # e.g. 1А
alpha_plus_numeric_probability: 0.03 # e.g. AА1
alpha_probability: 0.04 # e.g. dira А
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1

586
resources/addresses/hr.yaml Normal file
View File

@@ -0,0 +1,586 @@
# hr.yaml
# -------
# Croatian language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
no_number:
default:
canonical: bez broja
abbreviated: bb
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
default: &broj
canonical: broj
abbreviated: br
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "br."
whitespace_probability: 0.6
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &i
canonical: i
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
i: *i
at: &na
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &ugao
canonical: ugao
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &uglu
canonical: uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu: &na_uglu
canonical: na uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.65
alternatives:
- alternative: *na
probability: 0.1
- alternative: *uglu
probability: 0.1
- alternative: *na_uglu
probability: 0.1
- alternative: *ugao
probability: 0.05
izmedu: &izmedu
canonical: između
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *izmedu
levels:
kat: &kat
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
etaza: &etaza
canonical: etaža
abbreviated: et
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
prizemlje: &prizemlje
canonical: prizemlje
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
mezanino: &polukat
canonical: polukat
half_floors: true
canonical_probability: 0.8
sample_probability: 0.2
sample: true
# e.g. polukat 2
numeric:
direction: left
# e.g. 2. entresuelo
ordinal:
direction: right
numeric_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
podrum: &podrum
canonical: podrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. подрум 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. подрум
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podrum
"-1":
default: *podrum
# Special token for half-floors
half_floors:
default: *polukat
"0":
default: *prizemlje
probability: 0.5
alternatives:
- alternative: *parter
probability: 0.4
- alternative: *kat
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *kat
probability: 0.95
alternatives:
- alternative: *etaza
probability: 0.05
numeric_probability: 0.69 # With this probability, pick an integer
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: u blizini
nearby:
default:
canonical: u blizini
probability: 0.6
alternatives:
- alternative:
canonical: u blizini ovdje
probability: 0.3
- alternative:
canonical: oko ovdje
probability: 0.1
near_me:
default:
canonical: u blizini mene
# Don't worry about agreement
in:
default:
canonical: u
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &desno
canonical: desno
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lijevo
canonical: lijevo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *desno
probability: 0.5
- alternative: *lijevo
probability: 0.5
cardinal_directions:
east: &istok
canonical: istok
abbreviated: i
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: i
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: zapad
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sjever
canonical: sjever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jug
canonical: jug
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sjever
probability: 0.25
- alternative: *istok
probability: 0.23
- alternative: *jug
probability: 0.23
- alternative: *zapad
probability: 0.23
entrances:
ulaz: &ulaz
canonical: ulaz
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Ulaz 1, Ulaz A, etc.
alphanumeric: &entrance_alphanumeric
default: *ulaz
numeric_probability: 0.1 # e.g. Ulaz 1
alpha_probability: 0.85 # e.g. Ulaz A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stubiste: &stubiste
canonical: stubište
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stubiste
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *desno
probability: 0.2
- alternative: *lijevo
probability: 0.2
- alternative: *sjever
probability: 0.15
- alternative: *jug
probability: 0.15
- alternative: *istok
probability: 0.15
- alternative: *zapad
probability: 0.15
po_boxes:
postanski_pretinac: &postanski_pretinac
canonical: poštanski pretinac
abbreviated: p.p
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *postanski_pretinac
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
stan: &stan
canonical: stan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: apartman
abbreviated: ap
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba: &soba
canonical: soba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
ured: &ured
canonical: ured
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *stan
probability: 0.6
alternatives:
- alternative: *apartman
probability: 0.3
- alternative: *soba
probability: 0.1
numeric_probability: 0.9 # e.g. stan. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. stan A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *soba
probability: 0.6
alternatives:
- alternative: *ured
probability: 0.4
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *soba
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

439
resources/addresses/hu.yaml Normal file
View File

@@ -0,0 +1,439 @@
# hu.yaml
# -------
# Hungarian language specification.
components:
level:
null_probability: 0.75
alphanumeric_probability: 0.2
standalone_probability: 0.05
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- level
- unit
label: unit
separators:
- separator: "/"
probability: 0.55
- separator: " "
probability: 0.4
- separator: "-"
probability: 0.05
probability: 0.8
numbers:
default: &szam
canonical: szám
sample: true
# Probabilities
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &es
canonical: és
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
probability: 0.6
alternatives:
- alternative: &es_a
canonical: és a
canonical_probability: 0.9
sample: true
sample_probability: 0.1
probability: 0.2
- alternative: &es_az
canonical: és az
canonical_probability: 0.9
sample: true
sample_probability: 0.1
probability: 0.2
cross_streets:
and: *es
corner_of: &sarkan
canonical: sarkán
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *es
probability: 0.6
alternatives:
- alternative: *es_a
probability: 0.1
- alternative: *es_az
probability: 0.1
- alternative: *sarkan
probability: 0.2
between:
canonical: között
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &emelet
canonical: emelet
abbreviated: em
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.85
sample_probability: 0.05
numeric:
direction: left
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.2
roman_numeral_probability: 0.8
numeric_probability: 0.1
ordinal_probability: 0.9
foldszint: &foldszint
canonical: földszint
abbreviated: fszt
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
felemelet: &felemelet
canonical: félemelet
sample: true
canonical_probability: 0.8
sample_probability: 0.2
magasfoldszint: &magasfoldszint
canonical: magasföldszint
sample: true
canonical_probability: 0.8
sample_probability: 0.2
pince: &pince
canonical: pince
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
alagsor: &alagsor
canonical: alagsor
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
felszuteren: &felszuteren
canonical: félszuterén
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
szuteren: &szuteren
canonical: szuterén
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *alagsor
probability: 0.6
alternatives:
- alternative: *pince
probability: 0.3
- alternative: *szuteren
probability: 0.1
"-1":
default: *alagsor
probability: 0.5
alternatives:
- alternative: *pince
probability: 0.3
- alternative: *szuteren
probability: 0.1
- alternative: *felszuteren
probability: 0.1
"0":
default: *foldszint
probability: 0.9
alternatives:
- alternative: *emelet
probability: 0.1
"1":
default: *emelet
probability: 0.9
alternatives:
- alternative: *felemelet
probability: 0.1
"2":
default: *emelet
probability: 0.9
alternatives:
- alternative: *magasfoldszint
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *emelet
numeric_probability: 0.59 # With this probability, pick an integer
roman_numeral_probability: 0.4 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: közelében
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: közelben
sample: true
canonical_probability: 0.8
sample_probability: 0.2
near_me:
default:
canonical: közelemben
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.7
nearby_probability: 0.2
near_me_probability: 0.1
directions:
right: &jobb
canonical: jobb
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &bal
canonical: bal
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *jobb
probability: 0.5
- alternative: *bal
probability: 0.5
cardinal_directions:
east: &kelet
canonical: kelet
abbreviated: k
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: k
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &nyugat
canonical: nyugat
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &eszak
canonical: észak
abbreviated: e
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &del
canonical: dél
abbreviated: d
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: d
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *eszak
probability: 0.25
- alternative: *kelet
probability: 0.25
- alternative: *del
probability: 0.25
- alternative: *nyugat
probability: 0.25
po_boxes:
postafiok: &postafiok
canonical: postafiók
abbreviated: pf
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.7
sample_probability: 0.1
numeric:
direction: left
alphanumeric:
default: *postafiok
numeric_probability: 0.9 # Pf 123
alpha_probability: 0.05 # Pf A
numeric_plus_alpha_probability: 0.04 # Pf 123G
alpha_plus_numeric_probability: 0.01 # Pf A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
lakas: &lakas
canonical: lakás
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
numeric_probability: 0.3
ordinal_probability: 0.7
iroda: &iroda
canonical: iroda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
szoba: &szoba
canonical: szoba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *lakas
probability: 0.9
alternatives:
- alternative: *szoba
probability: 0.1
numeric_probability: 0.95 # e.g. m. 1
numeric_plus_alpha_probability: 0.005 # e.g. 1A
alpha_plus_numeric_probability: 0.005 # e.g. A1
alpha_probability: 0.04 # e.g. m. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.2
zones:
commercial: &commercial_unit_types
default: *iroda
numeric_probability: 0.95 # e.g. pokój 1
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
alpha_probability: 0.03 # e.g. pokój A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university: *commercial_unit_types

459
resources/addresses/is.yaml Normal file
View File

@@ -0,0 +1,459 @@
# is.yaml
# -------
# Icelandic language specification.
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- level
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.005
-
components:
- entrance
- unit
label: unit
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.1
probability: 0.001
numbers:
default: &numer
canonical: númer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *numer
alphanumeric_phrase_probability: 0.0001
and:
default: &og
canonical: og
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *og
corner_of: &horn_of
canonical: horn af
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &a_horinu_a
canonical: á horninu á
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *og
probability: 0.7
alternatives:
- alternative: *horn_of
probability: 0.15
- alternative: *a_horinu_a
probability: 0.15
between:
canonical: milli
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &haeo
canonical: hæð
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
jarohaeo: &jarohaeo
canonical: jarðhæð
sample: true
canonical_probability: 0.3
sample_probability: 0.7
kjallara: &kjallara
canonical: kjallara
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 kjallara
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1. kjallara
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *kjallara
"-1":
default: *kjallara
"0":
default: *jarohaeo
numbering_starts_at: 0
alphanumeric:
default: *haeo
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: nálægt
sample: true
canonical_probability: 0.8
sample_probability: 0.2
nearby:
default:
canonical: nálægt
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: nálægt hér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: hérna
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: hér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nálægt mér
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: í
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &til_haegri
canonical: til hægri
abbreviated: t.h
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &til_vinstri
canonical: til vinstri
abbreviated: t.v
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.6
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: t.v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *til_haegri
probability: 0.5
- alternative: *til_vinstri
probability: 0.5
cardinal_directions:
east: &austur
canonical: austur
abbreviated: a
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: a
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vestur
canonical: vestur
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &norour
canonical: norður
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &suour
canonical: suður
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *norour
probability: 0.25
- alternative: *austur
probability: 0.25
- alternative: *suour
probability: 0.25
- alternative: *vestur
probability: 0.25
entrances:
inngangur: &inngangur
canonical: inngangur
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Inngangur 1, Inngangur A, etc.
alphanumeric: &entrance_alphanumeric
default: *inngangur
numeric_probability: 0.1 # e.g. Inngangur 1
alpha_probability: 0.85 # e.g. Inngangur A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stigi
canonical: stigi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stigi
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *norour
- alternative: *suour
- alternative: *austur
- alternative: *vestur
po_boxes:
postholf: &postholf
canonical: pósthólf
abbreviated: ph
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Ph Nr 1234
alphanumeric:
sample: false
default: *postholf
numeric_probability: 0.9 # Ph 123
alpha_probability: 0.05 # Ph A
numeric_plus_alpha_probability: 0.04 # Ph 123G
alpha_plus_numeric_probability: 0.01 # Ph A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
ibuo: &ibuo
canonical: íbúð
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
null_phrase_probability: 0.5
# íbúð nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
alphanumeric: &unit_alphanumeric
default: *ibuo
numeric_probability: 0.9 # e.g. íbúð 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. íbúð A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2R, 2L, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1

673
resources/addresses/it.yaml Normal file
View File

@@ -0,0 +1,673 @@
# it.yaml
# -------
# Italian language specification
components:
level:
# If no floor number is specified
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.8
alphanumeric_probability: 0.2
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: /
probability: 1.0
probability: 0.5
numbers:
default: &numero
canonical: numero
abbreviated: "nº"
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.3
sample_probability: 0.5
numeric:
direction: left
numeric_affix:
affix: "n."
direction: left
# Probabilities for numbers
numeric_probability: 0.7
numeric_affix_probability: 0.3
and:
default: &e
canonical: e
abbreviated: "&"
canonical_probability: 0.7
abbreviated_probability: 0.25
sample: true
sample_probability: 0.05
house_numbers:
# sans numéro (s/n) addresses
no_number:
canonical: senza numero civico
abbreviated: snc
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numero
alphanumeric_phrase_probability: 0.01
no_number_probability: 0.05 # With this probability, use sin número if no house_number is specified
levels:
floor: &piano
canonical: piano
abbreviated:
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.15
sample_probability: 0.25
numeric:
direction: left
direction_probability: 0.95
add_number_phrase: true
add_number_phrase_probability: 0.05
digits:
ascii_probability: 0.9
roman_numeral_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.5
spellout_probability: 0.2
roman_numeral_probability: 0.3
numeric_probability: 0.55
ordinal_probability: 0.45
livello: &livello
canonical: livello
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.05
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
numeric_probability: 0.75
ordinal_probability: 0.25
piano_nobile: &piano_nobile
canonical: piano nobile
sample: true
canonical_probability: 0.9
sample_probability: 0.1
piano_terra: &piano_terra
canonical: piano terra
abbreviated: p.t
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.25
sample_probability: 0.25
basement: &seminterrato
canonical: seminterrato
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.99
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *seminterrato
probability: 0.995
alternatives:
- alternative: *piano
probability: 0.005
"-1":
default: *seminterrato
probability: 0.9995
alternatives:
- alternative: *piano
probability: 0.0005
"0":
default: *piano_terra
probability: 0.95
alternatives:
- alternative: *piano
probability: 0.05
"1":
default: *piano
probability: 0.9
alternatives:
- alternative: *piano_nobile
probability: 0.1
alphanumeric:
default: *piano
probability: 0.95
alternatives:
- alternative: *livello
probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
numbering_starts_at: 0
cross_streets:
# 26th & 6th Avenue
and: *e
# 26th @ Broadway
a: &a
canonical: a
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &angolo_di
canonical: angolo di
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &angolo
canonical: angolo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &all_angolo_tra
canonical: all'angolo tra
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *e
probability: 0.7
alternatives:
- alternative: *a
probability: 0.05
- alternative: *angolo_di
probability: 0.15
- alternative: *all_angolo_tra
probability: 0.1
# 26th betw 5th Ave and 6th Ave
between:
canonical: tra
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5 # Probability of using parentheses e.g. (between 5th and 6th)
categories:
near:
default:
canonical: vicino a
probability: 0.75
alternatives:
- alternative:
canonical: presso a
probability: 0.25
nearby:
default:
canonical: vicino
probability: 0.7
alternatives:
- alternative:
canonical: qui vicino
probability: 0.1
- alternative:
canonical: nelle vicinanze
probability: 0.1
- alternative:
canonical: intorno a qui
probability: 0.1
near_me:
default:
canonical: vicino a me
# Don't worry about agreement
in:
default:
canonical: a
probability: 0.7
alternatives:
- alternative:
canonical: ad
probability: 0.15
- alternative:
canonical: in
probability: 0.15
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &destra
canonical: destra
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
left: &sinistra
canonical: sinistra
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
rear: &posteriore
canonical: posteriore
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
front: &anteriore
canonical: anteriore
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *destra
probability: 0.49
- alternative: *sinistra
probability: 0.49
- alternative: *posteriore
probability: 0.01
- alternative: *anteriore
probability: 0.01
anteroposterior:
alternatives:
- alternative: *anteriore
probability: 0.5
- alternative: *posteriore
probability: 0.5
lateral:
alternatives:
- alternative: *destra
probability: 0.5
- alternative: *sinistra
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &ovest
canonical: ovest
abbreviated: o
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: o
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *ovest
probability: 0.25
entrances:
entrance: &ingresso
canonical: ingresso
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Ingresso 1, Ingresso A, etc.
alphanumeric: &entrance_alphanumeric
default: *ingresso
numeric_probability: 0.1 # e.g. Ingresso 1
alpha_probability: 0.85 # e.g. Ingresso A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
direction: right # e.g. Ingresso Nord
direction_probability: 0.95
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *ovest
- alternative: *destra
- alternative: *sinistra
- alternative: *posteriore
- alternative: *anteriore
staircases:
scala: &scala
canonical: scala
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Scala A, Scala 1, etc.
default: *scala
numeric_probability: 0.6 # e.g. Scala 1
alpha_probability: 0.35 # e.g. Scala A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Scala Destra
direction_probability: 0.9
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *ovest
- alternative: *destra
- alternative: *sinistra
- alternative: *posteriore
- alternative: *anteriore
po_boxes:
casella_postale: &casella_postale
canonical: casella postale
abbreviated: cp
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # CP No 1234
numeric_probability: 1.0
alphanumeric:
default: *casella_postale
numeric_probability: 0.9 # CP 123
alpha_probability: 0.05 # CP A
numeric_plus_alpha_probability: 0.04 # CP 123G
alpha_plus_numeric_probability: 0.01 # CP A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
flat: &appartamento
canonical: appartamento
abbreviated: app
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
casa: &casa
canonical: casa
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
unit: &unita
canonical: unità
abbreviated: u
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
office: &officina
canonical: officina
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.3
lotto: &lotto
canonical: lotto
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
door: &porta
canonical: porta
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
interno: &interno
canonical: interno
abbreviated: int
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
room: &sala
canonical: sala
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric: &unit_alphanumeric
default: *appartamento
probability: 0.75
alternatives:
- alternative: *interno
probability: 0.1
# e.g. just plain #3 or No. 4
- alternative: *numero
probability: 0.05
- alternative: *casa
probability: 0.05
- alternative: *porta
probability: 0.045
- alternative: *sala
probability: 0.005
numeric_probability: 0.9 # e.g. Appartement 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Appartement A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2D, 2G, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Unité Gauche
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1
zones:
residential: *unit_alphanumeric
commercial:
default: *officina
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.2
numeric_probability: 0.9 # e.g. Bureau 1
numeric_plus_alpha_probability: 0.01 # e.g. Bureau 1A
alpha_plus_numeric_probability: 0.01 # e.g. Bureau A1
alpha_probability: 0.08 # e.g. Bureau A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
industrial:
default: *lotto
probability: 0.5
alternatives:
- alternative: *officina
probability: 0.3
- alternative: *unita
probability: 0.2
numeric_probability: 0.9 # e.g. Lotto 1
numeric_plus_alpha_probability: 0.01 # e.g. Lotto 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lotto A1
alpha_probability: 0.08 # e.g. Lotto A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *sala
probability: 0.9
alternatives:
- alternative: *porta
probability: 0.1
numeric_probability: 0.9 # e.g. Salle 1
numeric_plus_alpha_probability: 0.01 # e.g. Salle 1A
alpha_plus_numeric_probability: 0.01 # e.g. Salle A1
alpha_probability: 0.08 # e.g. Salle A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

161
resources/addresses/ja.yaml Normal file
View File

@@ -0,0 +1,161 @@
# ja.yaml
# -------
# Japanese language specification
whitespace: false
components:
level:
null_probability: 0.95 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.05
unit:
# If no unit number is specified
null_probability: 1.0
conditional:
- component: level
probabilities:
null_probability: 0.95
alphanumeric_probability: 0.05
- component: house_number
probabilities:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
# Unit is just appended onto the house number
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 1.0
probability: 1.0
numbers:
default: &go
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
blocks:
alphanumeric:
default: &ban
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.85
alternatives:
- alternative: &banchi
canonical: 番地
numeric_affix:
affix: 番地
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.1
- alternative: &banchi_no
canonical: 番地の
numeric_affix:
affix: 番地の
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.05
numeric_probability: 1.0
alphanumeric_phrase_probability: 0.4
house_numbers:
alphanumeric:
default: *go
alphanumeric_phrase_probability: 0.4
levels:
kai: &kai
canonical:
numeric_affix:
affix:
direction: right
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *kai
numeric_probability: 1.0
po_boxes:
shishobako: &shishobako
canonical: 私書箱
numeric_affix:
affix: 私書箱
direction: left
digits:
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
alphanumeric:
default: *shishobako
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
metro_stations:
alphanumeric:
default: &eki
canonical:
numeric:
direction: right
numeric_affix:
affix:
direction: right
numeric_affix_probability: 1.0
alphanumeric_phrase_probability: 1.0
postcodes:
alphanumeric:
default:
canonical:
numeric_affix:
affix:
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.1
numeric_probability: 0.0
numeric_affix_probability: 0.9
units:
alphanumeric:
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8

View File

@@ -0,0 +1,180 @@
# ja_rm.yaml
# ----------
# Romaji (Romanized Japanese) language specification
components:
level:
null_probability: 0.95 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.05
unit:
# If no unit number is specified
null_probability: 1.0
conditional:
- component: level
probabilities:
null_probability: 0.95
alphanumeric_probability: 0.05
- component: house_number
probabilities:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
# Unit is just appended onto the house number
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 1.0
probability: 1.0
numbers:
default: &go
canonical: go
numeric_affix:
affix: -go
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
blocks:
alphanumeric:
default: &ban
canonical: ban
numeric_affix:
affix: -ban
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.85
alternatives:
- alternative: &banchi
canonical: banchi
numeric_affix:
affix: -ban
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.1
- alternative: &banchi_no
canonical: banchi-no
numeric_affix:
affix: -banchi-no
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.05
numeric_probability: 1.0
alphanumeric_phrase_probability: 0.4
house_numbers:
alphanumeric:
default: *go
alphanumeric_phrase_probability: 0.4
levels:
kai: &kai
canonical: kai
numeric_affix:
affix: -kai
upper_case: false
direction: right
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
gai: &gai
canonical: gai
numeric_affix:
affix: -gai
upper_case: false
direction: right
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *kai
probability: 0.6
alternatives:
- alternative: *gai
probability: 0.4
numeric_probability: 1.0
po_boxes:
shishobako: &shishobako
canonical: shishobako
numeric:
direction: left
numeric_probability: 1.0
alphanumeric:
default: *shishobako
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
metro_stations:
alphanumeric:
default: &eki
canonical: eki
numeric:
direction: right
title_case: false
numeric_affix:
affix: -eki
title_case: false
direction: right
numeric_affix_probability: 1.0
alphanumeric_phrase_probability: 1.0
postcodes:
alphanumeric:
# This should still be the default in Romaji
default:
canonical:
numeric_affix:
affix:
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.1
numeric_probability: 0.0
numeric_affix_probability: 0.9
units:
alphanumeric:
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8

122
resources/addresses/ko.yaml Normal file
View File

@@ -0,0 +1,122 @@
# ko.yaml
# -------
# Korean language specification
whitespace: false
components:
level:
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.15
unit:
# If no unit number is specified
null_probability: 0.6
alphanumeric_probability: 0.4
numbers:
combinations:
# Unit is just appended onto the house number
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 1.0
probability: 1.0
numbers:
default: &ho
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.9
alternatives:
- alternative: &ho_traditional
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.1
levels:
cheung: &cheung
canonical:
numeric_affix:
affix:
direction: right
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *cheung
numeric_probability: 1.0
po_boxes:
saseoham: &saseoham
canonical: 사서함
numeric_affix:
affix: 사서함
direction: left
digits:
ascii_probability: 0.7
unicode_full_width_probability: 0.1
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
alphanumeric:
default: *saseoham
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default: &upyeon_beonho
canonical: 우편번호
numeric_affix:
affix: 우편번호
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.9
numeric_probability: 0.0
numeric_affix_probability: 0.1
units:
alphanumeric:
default: *ho
probability: 0.9
alternatives:
- alternative: *ho_traditional
probability: 0.1
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8

View File

@@ -0,0 +1,90 @@
# ko_rm.yaml
# ----------
# Romanized Korean language specification
whitespace: false
components:
level:
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.15
unit:
# If no unit number is specified
null_probability: 0.6
alphanumeric_probability: 0.4
numbers:
combinations:
# Unit is just appended onto the house number
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 1.0
probability: 1.0
numbers:
default: &ho
canonical: ho
numeric_affix:
affix: -ho
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
levels:
cheung: &cheung
canonical: cheung
numeric_affix:
affix: -cheung
upper_case: false
direction: right
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *cheung
numeric_probability: 1.0
po_boxes:
saseoham: &saseoham
canonical: saseoham
numeric:
direction: left
alphanumeric:
default: *saseoham
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
alphanumeric:
default: *ho
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8

391
resources/addresses/lt.yaml Normal file
View File

@@ -0,0 +1,391 @@
# lt.yaml
# -------
# Lithuanian language specification.
components:
level:
null_probability: 0.97
alphanumeric_probability: 0.02
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.95
- separator: " - "
probability: 0.05
probability: 0.8
numbers:
default: &numeris
canonical: numeris
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &ir
canonical: ir
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *ir
corner_of: &kampelis
canonical: kampelis
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ir
probability: 0.7
alternatives:
- alternative: *kampelis
probability: 0.3
between:
canonical: nuo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
aukstas: &aukstas
canonical: aukštas
abbreviated: auk
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
numeric_probability: 0.2
ordinal_probability: 0.8
aukste: &aukste
<<: *aukstas
canonical: aukšte
# Ground floor
pirmas_aukstas: &pirmas_aukstas
canonical: pirmas aukštas
sample: true
canonical_probability: 0.8
sample_probability: 0.2
rusys: &rusys
canonical: rūsys
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
number_abs_value: true
number_min_abs_value: 1
rusyje: &rusyje
canonical: rūsyje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. rūsyje 1
numeric:
direction: left
direction_probability: 0.8
# e.g. r1
numeric_affix:
affix: r
direction: left
# e.g. 1. rūsyje
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.5
numeric_affix_probability: 0.1
ordinal_probability: 0.4
aliases:
"<-1":
default: *rusyje
"-1":
default: *rusys
"0": &ground_floor
default: *pirmas_aukstas
probability: 0.6
alternatives:
- alternative: *aukste
probability: 0.3
- alternative: *aukstas
probability: 0.1
"1": *ground_floor
numbering_starts_at: 1
alphanumeric:
default: *aukstas
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &desineje
canonical: dešinėje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &kaireje
canonical: kairėje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *desineje
probability: 0.5
- alternative: *kaireje
probability: 0.5
cardinal_directions:
east: &rytai
canonical: rytai
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
west: &vakarai
canonical: vakarai
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
north: &siaure
canonical: šiaurė
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
south: &pietus
canonical: pietūs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *siaure
probability: 0.25
- alternative: *rytai
probability: 0.25
- alternative: *pietus
probability: 0.25
- alternative: *vakarai
probability: 0.25
entrances:
wejscie: &iejimas
canonical: įėjimas
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# įėjimas 1, įėjimas A, etc.
alphanumeric: &entrance_alphanumeric
default: *iejimas
numeric_probability: 0.1 # e.g. įėjimas 1
alpha_probability: 0.85 # e.g. įėjimas A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
laiptai: &laiptai
canonical: laiptai
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *laiptai
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *siaure
- alternative: *rytai
- alternative: *pietus
- alternative: *vakarai
po_boxes:
pasto_dezute: &pasto_dezute
canonical: pašto dėžutė
abbreviated: p d
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.4
sample_probability: 0.5
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # pašto dėžutė 1234
alphanumeric:
default: *pasto_dezute
numeric_probability: 0.95 # P. d. 123
alpha_probability: 0.01 # pašto dėžutė A
numeric_plus_alpha_probability: 0.03 # P. d. 123G
alpha_plus_numeric_probability: 0.01 # P. d. A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
butas: &butas
canonical: butas
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
biuro: &biuro
canonical: biuro
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
kambarys: &kambarys
canonical: kambarys
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *butas
numeric_probability: 0.9 # e.g. butas 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. butas A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *biuro
numeric_probability: 0.95 # e.g. biuro 1
numeric_plus_alpha_probability: 0.01 # e.g. biuro 1A
alpha_plus_numeric_probability: 0.01 # e.g. biuro A1
alpha_probability: 0.03 # e.g. biuro A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *kambarys
numeric_probability: 0.95 # e.g. kambarys 1
numeric_plus_alpha_probability: 0.01 # e.g. kambarys 1A
alpha_plus_numeric_probability: 0.01 # e.g. kambarys A1
alpha_probability: 0.03 # e.g. kambarys A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

403
resources/addresses/lv.yaml Normal file
View File

@@ -0,0 +1,403 @@
# lv.yaml
# -------
# Latvian language specification.
components:
level:
null_probability: 0.97
alphanumeric_probability: 0.02
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.95
- separator: " - "
probability: 0.05
probability: 0.2
numbers:
default: &numurs
canonical: numurs
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &un
canonical: un
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *un
corner_of: &sturis
canonical: stūris
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &sturi
canonical: stūrī
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *un
probability: 0.7
alternatives:
- alternative: *sturi
probability: 0.2
- alternative: *sturis
probability: 0.1
between:
canonical: starp
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
stavs: &stavs
canonical: stāvs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
whitespace_probability: 0.5 # sometimes should be 2.stāvs
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
# Needs to be 1.0 so we don't get e.g. IIstāvs
ordinal_suffix_probability: 1.0
numeric_probability: 0.2
ordinal_probability: 0.8
# Ground floor
pirmais_stavs: &pirmais_stavs
canonical: pirmais stāvs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
pagrabs: &pagrabs
canonical: pagrabs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
number_abs_value: true
number_min_abs_value: 1
pagraba: &pagraba
canonical: pagraba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. pagraba 1
numeric:
direction: left
direction_probability: 0.8
# e.g. p1
numeric_affix:
affix: p
direction: left
# e.g. 1. pagraba
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.5
numeric_affix_probability: 0.1
ordinal_probability: 0.4
aliases:
"<-1":
default: *pagraba
"-1":
default: *pagrabs
"0": &ground_floor
default: *pirmais_stavs
probability: 0.6
alternatives:
- alternative: *stavs
probability: 0.4
"1": *ground_floor
numbering_starts_at: 1
alphanumeric:
default: *stavs
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &pa_labi
canonical: pa labi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &pa_kreisi
canonical: pa kreisi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *pa_labi
probability: 0.5
- alternative: *pa_kreisi
probability: 0.5
cardinal_directions:
east: &austrumu
canonical: austrumu
abbreviated: a
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.05
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: a
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &rietumu
canonical: rietumu
abbreviated: r
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.05
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: r
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &ziemelu
canonical: ziemeļu
abbreviated: z
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.05
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &dienvidu
canonical: dienvidu
abbreviated: d
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.05
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: d
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *ziemelu
probability: 0.25
- alternative: *dienvidu
probability: 0.25
- alternative: *austrumu
probability: 0.25
- alternative: *rietumu
probability: 0.25
entrances:
ieeja: &ieeja
canonical: ieeja
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# ieeja 1, ieeja A, etc.
alphanumeric: &entrance_alphanumeric
default: *ieeja
numeric_probability: 0.1 # e.g. ieeja 1
alpha_probability: 0.85 # e.g. ieeja A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
kapnu: &kapnu
canonical: kāpņu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
kapnu_telpa: &kapnu_telpa
canonical: kāpņu telpa
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *kapnu
probability: 0.6
alternatives:
- alternative: *kapnu_telpa
probability: 0.4
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *pa_labi
- alternative: *pa_kreisi
- alternative: *ziemelu
- alternative: *dienvidu
- alternative: *austrumu
- alternative: *rietumu
units:
dzivoklis: &dzivoklis
canonical: dzīvoklis
abbreviated: dz
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.8
sample_probability: 0.1
numeric:
direction: left
birojs: &birojs
canonical: birojs
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
istaba: &istaba
canonical: istaba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *dzivoklis
numeric_probability: 0.9 # e.g. m. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. m. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *birojs
numeric_probability: 0.95 # e.g. birojs 1
numeric_plus_alpha_probability: 0.01 # e.g. birojs 1A
alpha_plus_numeric_probability: 0.01 # e.g. birojs A1
alpha_probability: 0.03 # e.g. birojs A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *istaba
numeric_probability: 0.95 # e.g. istaba 1
numeric_plus_alpha_probability: 0.01 # e.g. istaba 1A
alpha_plus_numeric_probability: 0.01 # e.g. istaba A1
alpha_probability: 0.03 # e.g. istaba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

563
resources/addresses/nb.yaml Normal file
View File

@@ -0,0 +1,563 @@
# nb.yaml
# -------
# Norwegian language specification.
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
# Bolignummer
-
components:
- level
- unit
label: unit
zero_pad_digits: 2
separators:
- separator: ""
probability: 1.0
probability: 0.05
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *nummer
alphanumeric_phrase_probability: 0.0001
and:
default: &og
canonical: og
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *og
corner_of: &hjorne_av
canonical: hjørne av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &pa_hjornet_av
canonical: på hjørnet av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *og
probability: 0.7
alternatives:
- alternative: *hjorne_av
probability: 0.15
- alternative: *pa_hjornet_av
probability: 0.15
between:
canonical: mellom
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &etasje
canonical: etasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
direction_probability: 0.9
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.4
ordinal_probability: 0.6
hovedetasje: &hovedetasje
canonical: hovedetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: h
direction: left
zero_pad: 2
numeric_probability: 0.1
numeric_affix_probability: 0.9
underetasje: &underetasje
canonical: underetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: u
direction: left
zero_pad: 2
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.1
numeric_affix_probability: 0.9
loftsetasje: &loftsetasje
canonical: loftsetasje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: l
direction: left
zero_pad: 2
numeric_probability: 0.1
numeric_affix_probability: 0.9
loft: &loft
canonical: loft
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
kjeller: &kjeller
canonical: kjeller
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 kjeller
numeric:
direction: right
direction_probability: 0.8
# e.g. k01
numeric_affix:
affix: k
direction: left
zero_pad: 2
# e.g. 1. k
ordinal:
direction: right
standalone_probability: 0.9
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.09
ordinal_probability: 0.005
aliases:
"<-1":
default: *kjeller
"-1":
default: *kjeller
probability: 0.85
alternatives:
- alternative: *etasje
probability: 0.05
- alternative: *underetasje
probability: 0.1
"top":
default: *etasje
probability: 0.85
alternatives:
- alternative: *loftsetasje
probability: 0.1
- alternative: *loft
probability: 0.05
numbering_starts_at: 1
alphanumeric:
default: *etasje
probability: 0.95
alternatives:
- alternative: *hovedetasje
probability: 0.05
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: i nærheten av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.9
alternatives:
- alternative:
canonical: nær
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
nearby:
default:
canonical: i nærheten
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: rundt her
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: nær
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nær meg
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: i nærheten av meg
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
in:
default:
canonical: i
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &hoyre
canonical: høyre
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &venstre
canonical: venstre
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *hoyre
probability: 0.5
- alternative: *venstre
probability: 0.5
cardinal_directions:
east: &ost
canonical: øst
abbreviated: ø
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: ø
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vest
canonical: vest
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &syd
canonical: syd
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *ost
probability: 0.25
- alternative: *syd
probability: 0.25
- alternative: *vest
probability: 0.25
entrances:
inngang: &inngang
canonical: inngang
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *inngang
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stiege: &stiege
canonical: stiege
abbreviated: stg
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
trapp: &trapp
canonical: trapp
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trapp
probability: 0.8
alternatives:
- alternative: *stiege
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *nord
- alternative: *syd
- alternative: *ost
- alternative: *vest
po_boxes:
postboks: &postboks
canonical: postboks
abbreviated: pb
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Pb No 1234
boks: &boks
canonical: boks
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Boks No 1234
alphanumeric:
sample: false
default: *postboks
probability: 0.9
alternatives:
- alternative: *boks
probability: 0.1
numeric_probability: 0.9 # Pb 123
alpha_probability: 0.05 # Pb A
numeric_plus_alpha_probability: 0.04 # Pb 123G
alpha_plus_numeric_probability: 0.01 # Pb A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
leilighet: &leilighet
canonical: leilighet
abbreviated: leil
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.1
sample_probability: 0.3
numeric:
direction: left
null_phrase_probability: 0.3
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
hus: &hus
canonical: hus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
vaerelse: &vaerelse
canonical: værelse
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *leilighet
probability: 0.8
alternatives:
- alternative: *hus
probability: 0.1
- alternative: *vaerelse
probability: 0.1
numeric_probability: 0.95 # e.g. Lejlighed 1
alpha_probability: 0.05 # e.g. Lejl A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2H, 2V, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Lejlighed Venstre
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.2
# Use the actual floor phrase as long as the whole phrase is numeric
# Has the effect of creating Bolignummer-style units
use_floor_affix_unit_num_digits: 2

572
resources/addresses/nl.yaml Normal file
View File

@@ -0,0 +1,572 @@
# nl.yaml
# -------
# Note: base config covers Dutch as spoken in the Netherlands
# Belgium overrides go in country configs
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.8
alphanumeric_probability: 0.2
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: /
probability: 0.9
- separator: "-"
probability: 0.1
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "-"
probability: 0.9
- separator: /
probability: 0.1
probability: 0.01
and:
default: &en
canonical: en
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *nummer
alphanumeric_phrase_probability: 0.01
levels:
verdieping: &verdieping
canonical: verdieping
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
digits:
ascii_probability: 0.8
roman_numeral_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.5
roman_numeral_probability: 0.3
spellout_probability: 0.2
numeric_probability: 0.7
ordinal_probability: 0.3
etage: &etage
canonical: etage
abbreviated: et
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
numeric:
direction: left
digits:
ascii_probability: 0.8
roman_numeral_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.5
roman_numeral_probability: 0.3
spellout_probability: 0.2
numeric_probability: 0.7
ordinal_probability: 0.3
begane_grond: &begane_grond
canonical: begane grond
abbreviated: bg
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
benedenverdieping: &benedenverdieping
canonical: benedenverdieping
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parterre: &parterre
canonical: parterre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
gelijkvloers: &gelijkvloers
canonical: gelijkvloers
sample: true
canonical_probability: 0.8
sample_probability: 0.2
het_gelijkvloers: &het_gelijkvloers
canonical: het gelijkvloers
sample: true
canonical_probability: 0.8
sample_probability: 0.2
aliases:
"0":
default: *begane_grond
probability: 0.6
alternatives:
- alternative: *benedenverdieping
probability: 0.35
- alternative: *parterre
probability: 0.04
- alternative: *het_gelijkvloers
probability: 0.005
- alternative: *gelijkvloers
probability: 0.005
alphanumeric:
default: *verdieping
probability: 0.99
alternatives:
- alternative: *etage
probability: 0.01
numeric_probability: 0.79 # With this probability, pick an integer
roman_numeral_probability: 0.2 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: in de buurt van
probability: 0.8
alternatives:
- alternative:
canonical: bij
probability: 0.1
- alternative:
canonical: nabij
probability: 0.1
nearby:
default:
canonical: in de buurt
near_me:
default:
canonical: in de buurt van me
in:
default:
canonical: in
probability: 0.6
alternatives:
- alternative:
canonical: te
probability: 0.4
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
cross_streets:
and: *en
corner_of: &hoek_van
canonical: hoek van
at_the_corner_of: &op_de_hoek_van
canonical: op de hoek van
intersection:
default: *en
probability: 0.7
alternatives:
- alternative: *hoek_van
probability: 0.15
- alternative: *op_de_hoek_van
probability: 0.15
between:
canonical: tussen
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
entrances:
ingang: &ingang
canonical: ingang
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *ingang
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
po_boxes:
postbus: &postbus
canonical: postbus
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
antwoordnummer: &antwoordnummer
canonical: antwoordnummer
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
sample: false
default: *postbus
probability: 0.8
alternatives:
- alternative: *antwoordnummer
probability: 0.2
numeric_probability: 0.9 # 123
alpha_probability: 0.05 # A
numeric_plus_alpha_probability: 0.04 # 123G
alpha_plus_numeric_probability: 0.01 # A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
directions:
right: &rechts
canonical: rechts
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: r
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &links
canonical: links
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
numeric_affix:
affix: l
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: *rechts
probability: 0.5
- alternative: *links
probability: 0.5
cardinal_directions:
east: &oost
canonical: oost
abbreviated: o
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: o
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
oosten: &oosten
<<: *oost
canonical: oosten
oostelijke: &oostelijke
canonical: oostelijke
sample: true
canonical_probability: 0.8
sample_probability: 0.2
west: &west
canonical: west
abbreviated: w
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
westen: &westen
<<: *west
canonical: westen
westelijke: &westelijke
canonical: westelijke
sample: true
canonical_probability: 0.8
sample_probability: 0.2
north: &noord
canonical: noord
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
noorden: &noorden
<<: *noord
canonical: noorden
noordelijke: &noordelijke
canonical: noordelijke
sample: true
canonical_probability: 0.8
sample_probability: 0.2
south: &zuid
canonical: zuid
abbreviated: z
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
zuiden: &zuiden
<<: *zuid
canonical: zuiden
zuidelijke: &zuidelijke
canonical: zuidelijke
sample: true
canonical_probability: 0.8
sample_probability: 0.2
alternatives:
- alternative: *noord
probability: 0.25
- alternative: *oost
probability: 0.25
- alternative: *zuid
probability: 0.25
- alternative: *west
probability: 0.25
staircases:
stiege: &stiege
canonical: stiege
abbreviated: stg
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
trap: &trap
canonical: trap
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *trap
probability: 0.6
alternatives:
- alternative: *stiege
probability: 0.4
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
units:
appartement: &appartement
canonical: appartement
abbreviated: apt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
kamer: &kamer
canonical: kamer
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *appartement
probability: 0.6
alternatives:
- alternative: *kamer
probability: 0.4
numeric_probability: 0.9 # e.g. Apt 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. Apt A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2R, 2L, etc.
add_direction: true
add_direction_probability: 0.1
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Apt Rechts
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1
countries:
be:
components:
unit:
null_probability: 0.65
alphanumeric_probability: 0.35
levels:
verdieping: &verdieping_flemish
canonical: verdieping
abbreviated: verdiep
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.7
ordinal_probability: 0.3
aliases:
"0":
default: *het_gelijkvloers
probability: 0.5
alternatives:
- alternative: *gelijkvloers
probability: 0.5
alphanumeric:
default: *verdieping_flemish
probability: 0.9
alternatives:
- alternative: *etage
probability: 0.1
units:
bus: &bus
canonical: bus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric:
default: *appartement
probability: 0.1
alternatives:
- alternative: *bus
probability: 0.7
- alternative: *kamer
probability: 0.2

509
resources/addresses/pl.yaml Normal file
View File

@@ -0,0 +1,509 @@
# pl.yaml
# -------
# Polish language specification.
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
combinations:
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.9
- separator: "-"
probability: 0.05
- separator: " - "
probability: 0.05
probability: 0.01
numbers:
default: &numer
canonical: numer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
dom: &dom
canonical: dom
abbreviated: d
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
alphanumeric:
default: *numer
probability: 0.6
alternatives:
- alternative: *dom
probability: 0.4
alphanumeric_phrase_probability: 0.0001
and:
default: &i
canonical: i
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *i
at: &w
canonical: w
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &rogu
canonical: rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &na_rogu
canonical: na rogu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.7
alternatives:
- alternative: *w
probability: 0.1
- alternative: *rogu
probability: 0.1
- alternative: *na_rogu
probability: 0.1
between:
canonical: pomiędzy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &pietro
canonical: piętro
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
ordinal_suffix_probability: 0.6
numeric_probability: 0.4
ordinal_probability: 0.6
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
suterena: &suterena
canonical: suterena
# e.g. suterena 1
numeric:
direction: left
direction_probability: 0.8
# e.g. s1
numeric_affix:
affix: s
direction: left
# e.g. 1. suterena
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *suterena
"-1":
default: *suterena
"0":
default: *parter
probability: 0.9
alternatives:
- alternative: *pietro
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *pietro
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: koło
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
- alternative:
canonical: niedaleko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: obok
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: przy
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
nearby:
default:
canonical: w pobliżu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: w pobliżu tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: wokół tutaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: blisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: w pobliżu mnie
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: w
probability: 0.7
alternatives:
- alternative:
canonical: we
probability: 0.3
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prawo
canonical: prawo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lewo
canonical: lewo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prawo
probability: 0.5
- alternative: *lewo
probability: 0.5
cardinal_directions:
east: &wschod
canonical: wschód
abbreviated: w
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zachod
canonical: zachód
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &polnoc
canonical: północ
abbreviated: pn
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: pn
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &poludnie
canonical: południe
abbreviated: pd
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: pd
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *polnoc
probability: 0.25
- alternative: *wschod
probability: 0.25
- alternative: *poludnie
probability: 0.25
- alternative: *zachod
probability: 0.25
entrances:
wejscie: &wejscie
canonical: wejście
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Wejście 1, Wejście A, etc.
alphanumeric: &entrance_alphanumeric
default: *wejscie
numeric_probability: 0.1 # e.g. Wejście 1
alpha_probability: 0.85 # e.g. Wejście A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
schody: &schody
canonical: schody
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *schody
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *polnoc
- alternative: *poludnie
- alternative: *wschod
- alternative: *zachod
po_boxes:
skrytka_pocztowa: &skrytka_pocztowa
canonical: skrytka pocztowa
abbreviated: skr poczt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Skr Poczt 1234
alphanumeric:
default: *skrytka_pocztowa
numeric_probability: 0.9 # Skr Poczt 123
alpha_probability: 0.05 # Skr Poczt A
numeric_plus_alpha_probability: 0.04 # Skr Poczt 123G
alpha_plus_numeric_probability: 0.01 # Skr Poczt A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
mieszkanie: &mieszkanie
canonical: mieszkanie
abbreviated: m
sample: true
canonical_probability: 0.05
abbreviated_probability: 0.9
sample_probability: 0.05
numeric:
direction: left
pokoj: &pokoj
canonical: pokój
abbreviated: pok
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *mieszkanie
probability: 0.9
alternatives:
- alternative: *pokoj
probability: 0.1
numeric_probability: 0.9 # e.g. m. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. m. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *pokoj
numeric_probability: 0.95 # e.g. pokój 1
numeric_plus_alpha_probability: 0.01 # e.g. pokój 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokój A1
alpha_probability: 0.03 # e.g. pokój A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university: *commercial_unit_types

1054
resources/addresses/pt.yaml Normal file

File diff suppressed because it is too large Load Diff

504
resources/addresses/ro.yaml Normal file
View File

@@ -0,0 +1,504 @@
# ro.yaml
# -------
# Romanian language specification
components:
level:
# If no floor number is specified
null_probability: 0.6
alphanumeric_probability: 0.35
standalone_probability: 0.05
staircase:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
standalone_probability: 0.05
numbers:
default: &numar
canonical: număr
abbreviated: nr
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#" # e.g. #3, #2F, etc.
probability: 0.5
alternatives:
- alternative:
direction: left # affix goes on the number's left
# Probabilities for numbers
numeric_probability: 0.9
numeric_affix_probability: 0.1
and:
default: &si
canonical: și
abbreviated: "&"
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
cross_streets:
and: *si
corner_of: &colt
canonical: colț
sample: true
canonical_probability: 0.7
sample_probability: 0.3
at_the_corner_of: &la_coltul_de_pe
canonical: la colțul de pe
sample: true
canonical_probability: 0.7
sample_probability: 0.3
intersection:
default: *si
probability: 0.7
alternatives:
- alternative: *colt
probability: 0.2
- alternative: *la_coltul_de_pe
probability: 0.1
between:
canonical: între
sample: true
canonical_probability: 0.7
sample_probability: 0.3
parentheses_probabililty: 0.5
house_numbers:
# fara numar (FN) addresses
no_number:
default:
canonical: fără număr
abbreviated: fn
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numar
alphanumeric_phrase_probability: 0.7
no_number_probability: 0.1 # With this probability, use fara numar if no house_number is specified
levels:
floor: &etaj
canonical: etaj
abbreviated: et
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true # Occasionally add variation of "number", e.g. et. nr 2
add_number_phrase_probability: 0.05
digits:
ascii_probability: 0.8
roman_numeral_probability: 0.2
# Ground floor
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.8
sample_probability: 0.2
aliases:
"0":
default: *parter
probability: 0.9
alternatives:
- alternative: *etaj
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *etaj
add_number_phrase: true
add_number_phrase_probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
blocks:
alphanumeric:
default:
canonical: bloc
abbreviated: bl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
categories:
near:
default:
canonical: in apropiere de
nearby:
default:
canonical: în apropiere
probability: 0.5
alternatives:
- alternative:
canonical: in apropiere
probability: 0.2
- alternative:
canonical: aproape de aici
probability: 0.1
- alternative:
canonical: aici
probability: 0.1
- alternative:
canonical: în jurul aici
probability: 0.05
- alternative:
canonical: in jurul aici
probability: 0.05
near_me:
default:
canonical: lângă mine
probability: 0.7
alternatives:
- alternative:
canonical: langa mine
probability: 0.3
in:
default:
canonical: din
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &dreapta
canonical: dreapta
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: d
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
left: &stanga
canonical: stânga
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: s
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: *dreapta
probability: 0.5
- alternative: *stanga
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &vest
canonical: vest
abbreviated: v
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *vest
probability: 0.25
entrances:
entrada: &intrare
canonical: intrare
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
# Intrare 1, Intare A, etc.
alphanumeric:
default: *intrare
numeric_probability: 0.1 # e.g. Intrare 1
alpha_probability: 0.85 # e.g. Intrare A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *vest
- alternative: *dreapta
- alternative: *stanga
staircases:
scara: &scara
canonical: scara
abbreviated: sc
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Scara A, Scara 1, etc.
default: *scara
numeric_probability: 0.35 # e.g. Scara 1
alpha_probability: 0.6 # e.g. Scara A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Scara Nord
direction_probability: 0.8
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *vest
- alternative: *dreapta
- alternative: *stanga
po_boxes:
casuta_postala: &casuta_postala
canonical: căsuță poștală
abbreviated: cp
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.4 # Apdo No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *casuta_postala
numeric_probability: 0.9 # Apdo 123
alpha_probability: 0.05 # Apdo A
numeric_plus_alpha_probability: 0.04 # Apdo 123G
alpha_plus_numeric_probability: 0.01 # Apdo A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
apartament: &apartament
canonical: apartament
abbreviated: ap
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
numeric:
direction: left
sala: &sala
canonical: sală
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
birou: &birou
canonical: birou
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
lotul: &lotul
canonical: lotul
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *apartament
probability: 0.9
sample: true
alternatives:
- alternative: *sala
probability: 0.1
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
add_direction: true
add_direction_probability: 0.1
add_direction_numeric: true # Only for numbers
numeric_probability: 0.9 # e.g. ap 1
numeric_plus_alpha_probability: 0.01 # e.g. ap 1A
alpha_plus_numeric_probability: 0.01 # e.g. ap A1
alpha_probability: 0.08 # e.g. ap A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
zones:
residential: *unit_alphanumeric
commercial:
default: *birou
numeric_probability: 0.9 # e.g. Birou 1
numeric_plus_alpha_probability: 0.01 # e.g. Birou 1A
alpha_plus_numeric_probability: 0.01 # e.g. Birou A1
alpha_probability: 0.08 # e.g. Birou A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
industrial:
default: *lotul
probability: 0.5
alternatives:
- alternative: *birou
probability: 0.3
- alternative: *sala
probability: 0.2
numeric_probability: 0.9 # e.g. Lotul 1
numeric_plus_alpha_probability: 0.01 # e.g. Lotul 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lotul A1
alpha_probability: 0.08 # e.g. Lotul A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *sala
probability: 0.9
alternatives:
- alternative: *birou
probability: 0.1
numeric_probability: 0.9 # e.g. Sala 1
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
alpha_probability: 0.08 # e.g. Sala A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

1171
resources/addresses/ru.yaml Normal file

File diff suppressed because it is too large Load Diff

603
resources/addresses/sk.yaml Normal file
View File

@@ -0,0 +1,603 @@
# sk.yaml
# -------
# Slovakian language specification
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.9
alphanumeric_probability: 0.1
# Note: no combinations because of the house numbering scheme
numbers:
default: &cislo
canonical: číslo
abbreviated: č
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "č."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &a
canonical: a
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
conscription_numbers:
alphanumeric:
default:
canonical: súpisné číslo
abbreviated: s.č.
canonical_probability: 0.05
abbreviated_probability: 0.85
sample: true
sample_probability: 0.1
numeric:
direction: left
cross_streets:
and: *a
at: &na
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner_of: &rohu
canonical: rohu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &roh
canonical: roh
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &na_rohu
canonical: na rohu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *a
probability: 0.6
alternatives:
- alternative: *na
probability: 0.1
- alternative: *roh
probability: 0.1
- alternative: *rohu
probability: 0.1
- alternative: *na_rohu
probability: 0.1
between:
canonical: medzi
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
floor: &poschodie
canonical: poschodie
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
podlazie: &podlazie
canonical: podlažie
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
nadzemne_podlazie: &nadzemne_podlazie
canonical: nadzemné podlažie
abbreviated: np
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.8
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
numeric_probability: 0.4
ordinal_probability: 0.6
etaz: &etaz
canonical: etáž
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
numeric_probability: 0.4
ordinal_probability: 0.6
prizemie: &prizemie
canonical: prízemie
sample: true
canonical_probability: 0.9
sample_probability: 0.1
podzemne_podlazie: &podzemne_podlazie
canonical: podzemné podlažie
abbreviated: pp
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
# e.g. podzemné podlažie 1
numeric:
direction: left
direction_probability: 0.8
# e.g. pp1
numeric_affix:
affix: pp
direction: left
# e.g. 1. podzemné podlažie
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podzemne_podlazie
"-1":
default: *podzemne_podlazie
"0":
default: *prizemie
probability: 0.9
alternatives:
- alternative: *poschodie
probability: 0.05
- alternative: *podlazie
probability: 0.05
numbering_starts_at: 0
alphanumeric:
default: *poschodie
probability: 0.45
alternatives:
- alternative: *podlazie
probability: 0.35
- alternative: *nadzemne_podlazie
probability: 0.19
- alternative: *etaz
probability: 0.01
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: v blízkosti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.7
alternatives:
- alternative:
canonical: u
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: v okolí
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
- alternative:
canonical: okolo
sample: true
canonical_probability: 0.7
sample_probability: 0.3
probability: 0.05
nearby:
default:
canonical: blízkosti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
alternatives:
- alternative:
canonical: blízko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: v blízkosti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tady blízkosti
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: tady
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: tu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: v blízkosti tu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
- alternative:
canonical: v okolí
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.05
near_me:
default:
canonical: v blízkosti mne
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Don't worry about agreement
in:
default:
canonical: v
probability: 0.7
alternatives:
- alternative:
canonical: vo
probability: 0.3
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prava
canonical: pravá
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &lava
canonical: ľavá
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prava
probability: 0.5
- alternative: *lava
probability: 0.5
cardinal_directions:
east: &vychod
canonical: východ
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: západ
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sever
canonical: sever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &juh
canonical: juh
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sever
probability: 0.25
- alternative: *vychod
probability: 0.25
- alternative: *juh
probability: 0.25
- alternative: *zapad
probability: 0.25
entrances:
vchod: &vchod
canonical: vchod
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Wejście 1, Wejście A, etc.
alphanumeric: &entrance_alphanumeric
default: *vchod
numeric_probability: 0.1 # e.g. Wejście 1
alpha_probability: 0.85 # e.g. Wejście A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
schodisko: &schodisko
canonical: schodisko
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *schodisko
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *sever
- alternative: *juh
- alternative: *vychod
- alternative: *zapad
po_boxes:
postova_priehradka: &postova_priehradka
canonical: poštová priehradka
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštová priehradka 1234
alphanumeric:
default: *postova_priehradka
numeric_probability: 0.9 # poštová priehradka 123
alpha_probability: 0.05 # poštová priehradka A
numeric_plus_alpha_probability: 0.04 # poštová priehradka 123G
alpha_plus_numeric_probability: 0.01 # poštová priehradka A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
apartaman: &apartaman
canonical: apartmán
abbreviated: apt
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
izba: &izba
canonical: izba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelaria: &kancelaria
canonical: kancelária
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *apartaman
probability: 0.9
alternatives:
- alternative: *izba
probability: 0.1
numeric_probability: 0.9 # e.g. apt. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. apt. A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *izba
probability: 0.6
alternatives:
- alternative: *kancelaria
probability: 0.4
numeric_probability: 0.95 # e.g. pokoj 1
numeric_plus_alpha_probability: 0.01 # e.g. pokoj 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
alpha_probability: 0.03 # e.g. pokoj A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *izba
numeric_probability: 0.95 # e.g. pokoj 1
numeric_plus_alpha_probability: 0.01 # e.g. pok 1A
alpha_plus_numeric_probability: 0.01 # e.g. pokoj A1
alpha_probability: 0.03 # e.g. pokoj A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

539
resources/addresses/sl.yaml Normal file
View File

@@ -0,0 +1,539 @@
# sl.yaml
# -------
# Slovenian language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
no_number:
default:
canonical: brez številke
abbreviated: brez št
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
default: &stevilke
canonical: številke
abbreviated: št
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "št."
whitespace_probability: 0.6
direction: left
numeric_probability: 0.6
numeric_affix_probability: 0.4
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &in
canonical: in
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
i: *in
at: &na
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &vogalu
canonical: vogalu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_vogalu: &na_vogalu
canonical: na vogalu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *in
probability: 0.7
alternatives:
- alternative: *na
probability: 0.1
- alternative: *vogalu
probability: 0.15
- alternative: *na_vogalu
probability: 0.05
med: &med
canonical: med
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *med
levels:
nadstropje: &nadstropje
canonical: nadstropje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
pritlicje: &pritlicje
canonical: pritličje
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter: &parter
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
kleti: &kleti
canonical: kleti
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. kleti 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. kleti
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *kleti
"-1":
default: *kleti
"0":
default: *pritlicje
probability: 0.5
alternatives:
- alternative: *parter
probability: 0.4
- alternative: *nadstropje
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *nadstropje
numeric_probability: 0.69 # With this probability, pick an integer
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: v bližini
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.6
alternatives:
- alternative:
canonical: pri
probability: 0.4
nearby:
default:
canonical: v bližini
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.5
alternatives:
- alternative:
canonical: v bližini tukaj
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.3
- alternative:
canonical: okoli tukaj
probability: 0.1
- alternative:
canonical: tukaj
probability: 0.1
near_me:
default:
canonical: blizu mene
# Don't worry about agreement
in:
default:
canonical: v
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &prav
canonical: prav
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &levo
canonical: levo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *prav
probability: 0.5
- alternative: *levo
probability: 0.5
cardinal_directions:
east: &vzhod
canonical: vzhod
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zahod
canonical: zahod
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sever
canonical: sever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jug
canonical: jug
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sever
probability: 0.25
- alternative: *vzhod
probability: 0.23
- alternative: *jug
probability: 0.23
- alternative: *zahod
probability: 0.23
entrances:
vhod: &vhod
canonical: vhod
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Vhod 1, Vhod A, etc.
alphanumeric: &entrance_alphanumeric
default: *vhod
numeric_probability: 0.1 # e.g. Vhod 1
alpha_probability: 0.85 # e.g. Vhod A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stopnisce: &stopnisce
canonical: stopnišče
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stopnisce
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *prav
probability: 0.2
- alternative: *levo
probability: 0.2
- alternative: *sever
probability: 0.15
- alternative: *jug
probability: 0.15
- alternative: *vzhod
probability: 0.15
- alternative: *zahod
probability: 0.15
po_boxes:
postni_predal: &postni_predal
canonical: poštni predal
abbreviated: p.p
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *postni_predal
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
stanovanje: &stanovanje
canonical: stanovanje
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba: &soba
canonical: soba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
urad: &urad
canonical: urad
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *stanovanje
probability: 0.9
alternatives:
- alternative: *soba
probability: 0.1
numeric_probability: 0.9 # e.g. stanovanje 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. stanovanje A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *soba
probability: 0.6
alternatives:
- alternative: *urad
probability: 0.4
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *soba
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

953
resources/addresses/sr.yaml Normal file
View File

@@ -0,0 +1,953 @@
# sr.yaml
# -------
# Serbian language specification
alphabet: абвгдђежзијклљмнњопрстћуфхцчџш
alphanumeric_probability: 0.7
components:
level:
null_probability: 0.8
alphanumeric_probability: 0.2
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
default: &broj
canonical: број
abbreviated: бр
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "бр."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: &broj_latin
canonical: broj
abbreviated: br
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "br."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &i
canonical: и
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.9
alternatives:
- alternative: &i_latin
canonical: i
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
cross_streets:
i: *i
i_latin: *i_latin
at: &na
canonical: на
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_latin: &na_latin
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &ugao
canonical: угао
sample: true
canonical_probability: 0.8
sample_probability: 0.2
ugao_latin: &ugao_latin
canonical: ugao
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu: &na_uglu
canonical: на углу
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu_latin: &na_uglu_latin
canonical: na uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.65
alternatives:
- alternative: *i_latin
probability: 0.05
- alternative: *na
probability: 0.075
- alternative: *na_latin
probability: 0.025
- alternative: *ugao
probability: 0.1
- alternative: *ugao_latin
probability: 0.05
- alternative: *na_uglu
probability: 0.025
- alternative: *na_uglu_latin
probability: 0.025
izmedu: &izmedu
canonical: између
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
izmedu_latin: &izmedu_latin
canonical: između
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *izmedu
probability: 0.9
alternatives:
- alternative: *izmedu_latin
probability: 0.1
levels:
sprat: &sprat
canonical: спрат
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
sprat_latin: &sprat_latin
canonical: sprat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
kat: &kat
canonical: кат
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
kat_latin: &kat_latin
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
etaza: &etaza
canonical: етажа
abbreviated: ет
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
etaza_latin: &etaza_latin
canonical: etaža
abbreviated: et
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
prizemlje: &prizemlje
canonical: приземље
sample: true
canonical_probability: 0.9
sample_probability: 0.1
prizemlje_latin: &prizemlje_latin
canonical: prizemlje
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter: &parter
canonical: партер
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter_latin: &parter_latin
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
podrum: &podrum
canonical: подрум
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. подрум 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. подрум
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
podrum_latin: &podrum_latin
canonical: podrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. подрум 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. подрум
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podrum
probability: 0.8
alternatives:
- alternative: *podrum_latin
probability: 0.2
"-1":
default: *podrum
probability: 0.8
alternatives:
- alternative: *podrum_latin
probability: 0.2
"0":
default: *prizemlje
probability: 0.45
alternatives:
- alternative: *prizemlje_latin
probability: 0.05
- alternative: *parter
probability: 0.35
- alternative: *parter_latin
probability: 0.05
- alternative: *sprat
probability: 0.04
- alternative: *sprat_latin
probability: 0.01
- alternative: *kat
probability: 0.04
- alternative: *kat_latin
probability: 0.01
numbering_starts_at: 0
alphanumeric:
default: *sprat
probability: 0.65
alternatives:
- alternative: *sprat_latin
probability: 0.1
- alternative: *kat
probability: 0.15
- alternative: *kat_latin
probability: 0.05
- alternative: *etaza
probability: 0.04
- alternative: *etaza_latin
probability: 0.01
numeric_probability: 0.69 # With this probability, pick an integer
roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &desno
canonical: десно
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
desno_latin: &desno_latin
canonical: desno
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &levo
canonical: лево
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
levo_latin: &levo_latin
canonical: levo
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *desno
probability: 0.45
- alternative: *desno_latin
probability: 0.05
- alternative: *levo
probability: 0.45
- alternative: *levo_latin
probability: 0.05
cardinal_directions:
east: &istok
canonical: исток
abbreviated: и
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: и
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
istok_latin: &istok_latin
canonical: istok
abbreviated: i
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: i
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: запад
abbreviated: з
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: з
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
zapad_latin: &zapad_latin
canonical: zapad
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sever
canonical: север
abbreviated: с
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: с
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
sever_latin: &sever_latin
canonical: sever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jug
canonical: југ
abbreviated: ј
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: ј
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
jug_latin: &jug_latin
canonical: jug
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sever
probability: 0.23
- alternative: *sever_latin
probability: 0.02
- alternative: *istok
probability: 0.23
- alternative: *istok_latin
probability: 0.02
- alternative: *jug
probability: 0.23
- alternative: *jug_latin
probability: 0.02
- alternative: *zapad
probability: 0.23
- alternative: *zapad_latin
probability: 0.02
entrances:
ulaz: &ulaz
canonical: улаз
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ulaz_latin: &ulaz_latin
canonical: ulaz
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Ulaz 1, Ulaz A, etc.
alphanumeric: &entrance_alphanumeric
default: *ulaz
probability: 0.8
alternatives:
- alternative: *ulaz_latin
probability: 0.2
numeric_probability: 0.1 # e.g. Ulaz 1
alpha_probability: 0.85 # e.g. Ulaz A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stepeniste: &stepeniste
canonical: степениште
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
stepeniste_latin: &stepeniste_latin
canonical: stepenište
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stepeniste
probability: 0.8
alternatives:
- alternative: *stepeniste_latin
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *desno
probability: 0.19
- alternative: *desno_latin
probability: 0.01
- alternative: *levo
probability: 0.19
- alternative: *levo_latin
probability: 0.01
- alternative: *sever
probability: 0.14
- alternative: *sever_latin
probability: 0.01
- alternative: *jug
probability: 0.14
- alternative: *jug_latin
probability: 0.01
- alternative: *istok
probability: 0.14
- alternative: *istok_latin
probability: 0.01
- alternative: *zapad
probability: 0.14
- alternative: *zapad_latin
probability: 0.01
po_boxes:
postanski_fah: &postanski_fah
canonical: поштански фах
abbreviated: пф
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
postanski_fah_latin: &postanski_fah_latin
canonical: poštanski fah
abbreviated: pf
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
postanski_pretinac: &postanski_pretinac
canonical: поштански претинац
sample: true
canonical_probability: 0.6
sample_probability: 0.5
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pretinac_latin: &postanski_pretinac_latin
canonical: poštanski pretinac
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pregradak: &postanski_pregradak
canonical: поштански преградак
sample: true
canonical_probability: 0.6
sample_probability: 0.5
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pregradak_latin: &postanski_pregradak_latin
canonical: poštanski pregradak
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *postanski_fah
probability: 0.7
alternatives:
- alternative: *postanski_fah_latin
probability: 0.05
- alternative: *postanski_pretinac
probability: 0.1
- alternative: *postanski_pretinac_latin
probability: 0.05
- alternative: *postanski_pregradak
probability: 0.075
- alternative: *postanski_pregradak_latin
probability: 0.025
numeric_probability: 0.9 # pf 123
alpha_probability: 0.05 # pf A
numeric_plus_alpha_probability: 0.04 # pf 123G
alpha_plus_numeric_probability: 0.01 # pf A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
stan: &stan
canonical: стан
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
stan_latin: &stan_latin
canonical: stan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: апартман
abbreviated: ап
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman_latin: &apartman_latin
canonical: apartman
abbreviated: ap
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba: &soba
canonical: соба
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba_latin: &soba_latin
canonical: soba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelarija: &kancelarija
canonical: канцеларија
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelarija_latin: &kancelarija_latin
canonical: kancelarija
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *stan
probability: 0.5
alternatives:
- alternative: *stan_latin
probability: 0.1
- alternative: *apartman
probability: 0.2
- alternative: *apartman_latin
probability: 0.05
- alternative: *soba
probability: 0.1
- alternative: *soba_latin
probability: 0.05
numeric_probability: 0.9 # e.g. stan. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. stan A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *soba
probability: 0.55
alternatives:
- alternative: *soba_latin
probability: 0.05
- alternative: *kancelarija
probability: 0.35
- alternative: *kancelarija_latin
probability: 0.05
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *soba
probability: 0.9
alternatives:
- alternative: *soba_latin
probability: 0.1
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

795
resources/addresses/sv.yaml Normal file
View File

@@ -0,0 +1,795 @@
# sv.yaml
# -------
# Swedish language specification.
components:
level:
null_probability: 0.85
alphanumeric_probability: 0.1
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.75
alphanumeric_probability: 0.25
numbers:
default: &nummer
canonical: nummer
abbreviated: nr
sample: true
# Probabilities
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#"
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
house_numbers:
alphanumeric:
default: *nummer
alphanumeric_phrase_probability: 0.0001
and:
default: &och
canonical: och
abbreviated: "&"
canonical_probability: 0.2
abbreviated_probability: 0.75
sample: true
sample_probability: 0.05
cross_streets:
and: *och
corner_of: &hornet_av
canonical: hörnet av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
at_the_corner_of: &i_hornet_av
canonical: i hörnet av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *och
probability: 0.7
alternatives:
- alternative: *hornet_av
probability: 0.15
- alternative: *i_hornet_av
probability: 0.15
between:
canonical: mellan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
levels:
vaningen: &vaningen
canonical: våningen
abbreviated: vån
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
ordinal:
direction: right
numeric_probability: 0.0
ordinal_probability: 1.0
vaning: &vaning
canonical: våning
abbreviated: vån
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.8
spellout_probability: 0.2
ordinal:
direction: left
digits:
ascii_probability: 0.8
spellout_probability: 0.2
numeric_probability: 0.8
ordinal_probability: 0.2
plan: &plan
canonical: plan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
entreplan: &entreplan
canonical: entréplan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
trappa_upp: &trappa_upp
canonical: trappa upp
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
number_min_abs_value: 2
number_max_abs_value: 2
number_subtract_abs_value: 1
numeric_probability: 0.8
ordinal_probability: 0.2
trappor_upp: &trappor_upp
canonical: trappor upp
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
number_min_abs_value: 3
number_subtract_abs_value: 1
numeric_probability: 0.8
ordinal_probability: 0.2
trappa: &trappa
canonical: trappa
abbreviated: tr
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
numeric:
direction: left
digits:
ascii_probability: 0.8
spellout_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
number_min_abs_value: 2
number_max_abs_value: 2
number_subtract_abs_value: 1
numeric_probability: 0.8
ordinal_probability: 0.2
trappor: &trappor
canonical: trappor
abbreviated: tr
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
numeric:
direction: left
digits:
ascii_probability: 0.8
spellout_probability: 0.2
ordinal:
direction: right
digits:
ascii_probability: 0.8
spellout_probability: 0.2
number_min_abs_value: 3
number_subtract_abs_value: 1
numeric_probability: 0.8
ordinal_probability: 0.2
bottenvaning: &bottenvaning
canonical: bottenvåning
abbreviated: bv
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
vindsvaningen: &vindsvaningen
canonical: vindsvåningen
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
vinds: &vinds
canonical: vinds
sample: true
canonical_probability: 0.8
sample_probability: 0.2
standalone_probability: 1.0
kallare: &kallare
canonical: källare
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# e.g. 1 källare
numeric:
direction: right
direction_probability: 0.8
# e.g. k1
numeric_affix:
affix: k
direction: left
# e.g. 1:a k
ordinal:
direction: right
standalone_probability: 0.9
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.09
ordinal_probability: 0.005
aliases:
"<-1":
default: *kallare
probability: 0.95
alternatives:
- alternative: *vaning
probability: 0.025
- alternative: *vaningen
probability: 0.025
"-1":
default: *kallare
probability: 0.9
alternatives:
- alternative: *vaning
probability: 0.05
- alternative: *vaningen
probability: 0.05
"0":
default: *bottenvaning
probability: 0.6
alternatives:
- alternative: *entreplan
probability: 0.2
- alternative: *vaningen
probability: 0.1
- alternative: *vaning
probability: 0.1
"1":
default: *bottenvaning
probability: 0.6
alternatives:
- alternative: *entreplan
probability: 0.2
- alternative: *vaningen
probability: 0.1
- alternative: *vaning
probability: 0.1
"top":
default: *vaningen
probability: 0.35
alternatives:
- alternative: *vaning
probability: 0.35
- alternative: *trappor_upp
probability: 0.1
- alternative: *trappor
probability: 0.1
- alternative: *vindsvaningen
probability: 0.05
- alternative: *vinds
probability: 0.05
numbering_starts_at: 1
alphanumeric:
default: *vaningen
probability: 0.25
alternatives:
- alternative: *vaning
probability: 0.2
- alternative: *plan
probability: 0.05
- alternative: *trappa_upp
probability: 0.125
- alternative: *trappa
probability: 0.125
- alternative: *trappor_upp
probability: 0.125
- alternative: *trappor
probability: 0.125
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
categories:
near:
default:
canonical: i närheten av
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.8
alternatives:
- alternative:
canonical: nära
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
nearby:
default:
canonical: i närheten
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.4
alternatives:
- alternative:
canonical: runt här
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
- alternative:
canonical: nära här
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: nära här
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: nära
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
- alternative:
canonical: omkring här
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
near_me:
default:
canonical: nära mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.8
alternatives:
- alternative:
canonical: i närheten av mig
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.2
in:
default:
canonical: i
probability: 0.8
alternatives:
- alternative:
canonical:
probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
directions:
right: &hoger
canonical: höger
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: h
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
left: &vanster
canonical: vänster
sample: true
canonical_probability: 0.1
sample_probability: 0.9
numeric:
direction: right
numeric_affix:
affix: v
direction: right
whitespace_probability: 0.1
numeric_probability: 0.8
numeric_affix_probability: 0.2
alternatives:
- alternative: *hoger
probability: 0.5
- alternative: *vanster
probability: 0.5
cardinal_directions:
east: &ost
canonical: öst
abbreviated: ö
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: ö
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
eastern: &ostra
canonical: östra
abbreviated: ö:a
canonical_probability: 0.9
abbreviated_probability: 0.1
numeric:
direction: right
west: &vast
canonical: väst
abbreviated: v
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: v
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
western: &vastra
canonical: västra
abbreviated: v:a
canonical_probability: 0.9
abbreviated_probability: 0.1
numeric:
direction: right
north: &norr
canonical: norr
abbreviated: n
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
northern: &norra
canonical: norra
abbreviated: n:a
canonical_probability: 0.9
abbreviated_probability: 0.1
south: &sod
canonical: söd
abbreviated: s
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
southern: &sodra
canonical: södra
abbreviated: s:a
canonical_probability: 0.9
abbreviated_probability: 0.1
alternatives:
- alternative: *norr
probability: 0.25
- alternative: *ost
probability: 0.25
- alternative: *sod
probability: 0.25
- alternative: *vast
probability: 0.25
entrances:
ingang: &ingang
canonical: ingång
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
entre: &entre
canonical: entré
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Eingang 1, Eingang A, etc.
alphanumeric: &entrance_alphanumeric
default: *ingang
probability: 0.6
alternatives:
- alternative: *entre
probability: 0.4
numeric_probability: 0.1 # e.g. Eingang 1
alpha_probability: 0.85 # e.g. Eingang A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
uppgang: &uppgang
canonical: uppgång
abbreviated: u
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
numeric:
direction: left
uppgang_hoger: &uppgang_hoger
canonical: uppgång höger
abbreviated: uh
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
uppgang_vanster: &uppgang_vanster
canonical: uppgång vänster
abbreviated: uv
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *uppgang
probability: 0.6
alternatives:
- alternative: *uppgang_hoger
probability: 0.2
- alternative: *uppgang_vanster
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: left
direction_probability: 0.85
modifier:
alternatives:
- alternative: *norr
- alternative: *sod
- alternative: *ost
- alternative: *vast
po_boxes:
box: &box
canonical: box
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Box No 1234
postlada: &postlada
canonical: postlåda
abbreviated: pl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # Pl No 1234
alphanumeric:
sample: false
default: *box
probability: 0.9
alternatives:
- alternative: *postlada
probability: 0.1
numeric_probability: 0.9 # Box 123
alpha_probability: 0.05 # Box A
numeric_plus_alpha_probability: 0.04 # Box 123G
alpha_plus_numeric_probability: 0.01 # Box A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.1
- length: 5
probability: 0.5
- length: 6
probability: 0.05
units:
lagenhet: &lagenhet
canonical: lägenhet
abbreviated: lgh
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.5
sample_probability: 0.3
numeric:
direction: left
null_phrase_probability: 0.1
# Lejlighed nummer 4
add_number_phrase: true
add_number_phrase_probability: 0.05
bostad: &bostad
canonical: bostad
abbreviated: bst
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.5
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.05
lagenhetsnummer: &lagenhetsnummer
canonical: lägenhetsnummer
abbreviated: lgh nr
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
hus: &hus
canonical: hus
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
rum: &rum
canonical: rum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *lagenhet
probability: 0.75
alternatives:
- alternative: *lagenhetsnummer
probability: 0.05
- alternative: *hus
probability: 0.1
- alternative: *rum
probability: 0.1
numeric_probability: 0.95 # e.g. Lägenhet 1
alpha_probability: 0.05 # e.g. Lgh A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# Separate random probability for adding directions like 2H, 2V, etc.
add_direction: true
add_direction_probability: 0.005
# Add directions for plain numbers
add_direction_numeric: true
# Add direction only e.g. Lejlighed Igjen
add_direction_standalone: true
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.2
# Use the actual floor phrase as long as the whole phrase is numeric
# Has the effect of creating Bolignummer-style units
use_floor_affix_unit_num_digits: 2
# In Swedish addresses, the ground level is 10, floors are 11, 12, ... basements are 9, 8, ...
use_floor_ground_starts_at: 10
# For single digit floors, use 09, 08, etc.
use_floor_floor_num_digits: 2
countries:
# Swedish addresses in Finland
fi:
units:
alphanumeric:
default: *bostad
probability: 1.0
alternatives: []
add_direction: false
add_direction_numeric: false
add_direction_standalone: false
use_floor_probability: 0.1
use_floor_affix_unit_num_digits: 0
use_floor_ground_starts_at: 1
use_floor_floor_num_digits: 2

503
resources/addresses/tr.yaml Normal file
View File

@@ -0,0 +1,503 @@
# tr.yaml
# -------
# Turkish language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
default: &numara
canonical: numara
abbreviated: "no:"
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "no:"
whitespace_probability: 0.6
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &ve
canonical: ve
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
ve: *ve
corner_of: &kose
canonical: köşe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
kosesinde: &kosesinde
canonical: köşesinde
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ve
probability: 0.8
alternatives:
- alternative: *kose
probability: 0.1
- alternative: *kosesinde
probability: 0.1
arasinda: &arasinda
canonical: arasında
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *arasinda
levels:
kat: &kat
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
zemin_kat: &zemin_kat
canonical: zemin kat
abbreviated: zk
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
asma_kat: &asma_kat
canonical: asma kat
half_floors: true
canonical_probability: 0.8
sample_probability: 0.2
sample: true
# e.g. asma kat 2
numeric:
direction: left
# e.g. 2. asma kat
ordinal:
direction: right
numeric_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
bodrum: &bodrum
canonical: bodrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. bodrum 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. bodrum
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *bodrum
"-1":
default: *bodrum
# Special token for half-floors
half_floors:
default: *asma_kat
"0":
default: *zemin_kat
probability: 0.9
alternatives:
- alternative: *kat
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *kat
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &sag
canonical: sağ
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &sol
canonical: sol
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *sag
probability: 0.5
- alternative: *sol
probability: 0.5
cardinal_directions:
east: &dogu
canonical: doğu
abbreviated: d
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: d
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &bati
canonical: batı
abbreviated: b
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: b
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &kuzey
canonical: kuzey
abbreviated: k
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: k
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &guney
canonical: güney
abbreviated: g
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: g
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *kuzey
probability: 0.25
- alternative: *dogu
probability: 0.23
- alternative: *guney
probability: 0.23
- alternative: *bati
probability: 0.23
entrances:
giris: &giris
canonical: giriş
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# giriş 1, giriş A, etc.
alphanumeric: &entrance_alphanumeric
default: *giris
numeric_probability: 0.1 # e.g. giriş 1
alpha_probability: 0.85 # e.g. giriş A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
merdiven: &merdiven
canonical: merdiven
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *merdiven
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *sag
probability: 0.2
- alternative: *sol
probability: 0.2
- alternative: *kuzey
probability: 0.15
- alternative: *guney
probability: 0.15
- alternative: *dogu
probability: 0.15
- alternative: *bati
probability: 0.15
po_boxes:
posta_kutusu: &posta_kutusu
canonical: posta kutusu
abbreviated: pk
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *posta_kutusu
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
daire: &daire
canonical: daire
abbreviated: d
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: apartman
abbreviated: apt
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
oda: &oda
canonical: oda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
ofis: &ofis
canonical: ofis
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *daire
probability: 0.6
alternatives:
- alternative: *apartman
probability: 0.3
- alternative: *oda
probability: 0.1
numeric_probability: 0.9 # e.g. d. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. daire A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *oda
probability: 0.6
alternatives:
- alternative: *ofis
probability: 0.4
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *oda
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1

1001
resources/addresses/uk.yaml Normal file

File diff suppressed because it is too large Load Diff

292
resources/addresses/zh.yaml Normal file
View File

@@ -0,0 +1,292 @@
# zh.yaml
# -------
# Chinese language specification (default is mainland China, Hong Kong below)
whitespace: false
components:
level:
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.15
unit:
# If no unit number is specified
null_probability: 0.6
alphanumeric_probability: 0.4
numbers:
default: &hao
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.8
alternatives:
- alternative: &hao_traditional
canonical:
numeric_affix:
affix:
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
probability: 0.2
house_numbers:
alphanumeric:
default: *hao
probability: 0.8
alternatives:
- alternative: *hao_traditional
probability: 0.2
alphanumeric_phrase_probability: 0.6
levels:
lou: &lou
canonical:
numeric_affix:
affix:
direction: right
add_number_phrase: true
add_number_phrase_probability: 0.5
digits:
ascii_probability: 0.6
unicode_full_width_probability: 0.1
spellout_probability: 0.3
numeric_probability: 0.0
numeric_affix_probability: 1.0
lou_traditional: &lou_traditional
canonical:
numeric_affix:
affix:
direction: right
add_number_phrase: true
add_number_phrase_probability: 0.5
digits:
ascii_probability: 0.6
unicode_full_width_probability: 0.1
spellout_probability: 0.3
numeric_probability: 0.0
numeric_affix_probability: 1.0
ceng: &ceng
canonical:
numeric_affix:
affix:
direction: right
add_number_phrase: true
add_number_phrase_probability: 0.5
digits:
ascii_probability: 0.6
unicode_full_width_probability: 0.1
spellout_probability: 0.3
numeric_probability: 0.0
numeric_affix_probability: 1.0
ceng_traditional: &ceng_traditional
canonical:
numeric_affix:
affix:
direction: right
add_number_phrase: true
add_number_phrase_probability: 0.5
digits:
ascii_probability: 0.6
unicode_full_width_probability: 0.1
spellout_probability: 0.3
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *lou
probability: 0.85
alternatives:
- alternative: *lou_traditional
probability: 0.05
- alternative: *ceng
probability: 0.08
- alternative: *ceng_traditional
probability: 0.02
numeric_probability: 1.0
po_boxes:
youzheng_xinxiang: &youzheng_xinxiang
canonical: 邮政信箱
numeric_affix:
affix: 邮政信箱
direction: left
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
use_number_phrase: true
use_number_phrase_probability: 0.8
numeric_probability: 0.0
numeric_affix_probability: 1.0
youzheng_xinxiang_traditional: &youzheng_xinxiang_traditional
canonical: 郵政信箱
numeric_affix:
affix: 郵政信箱
direction: left
digits:
ascii_probability: 0.3
unicode_full_width_probability: 0.5
spellout_probability: 0.2
use_number_phrase: true
use_number_phrase_probability: 0.8
numeric_probability: 0.0
numeric_affix_probability: 1.0
alphanumeric:
default: *youzheng_xinxiang
probability: 0.9
alternatives:
- alternative: *youzheng_xinxiang_traditional
probability: 0.1
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default: &youbian
canonical: 邮编
numeric_affix:
affix: 邮编
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.9
numeric_probability: 0.0
numeric_affix_probability: 0.1
probability: 0.9
alternatives:
- alternative: &youbian_traditional
canonical: 郵編
numeric_affix:
affix: 郵編
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.9
numeric_probability: 0.0
numeric_affix_probability: 0.1
probability: 0.1
units:
shi: &shi
canonical:
numeric_affix:
affix:
direction: right
add_number_phrase: true
add_number_phrase_probability: 0.5
digits:
ascii_probability: 0.6
unicode_full_width_probability: 0.1
spellout_probability: 0.3
numeric_probability: 0.0
numeric_affix_probability: 1.0
alphanumeric:
default: *shi
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8
countries:
# Hong Kong
hk:
components:
# Floor number a little more common in Hong Kong than mainland China
level:
null_probability: 0.75
alphanumeric_probability: 0.25
numbers: &numbers_prefer_traditional
default: *hao_traditional
probability: 0.7
alternatives:
- alternative: *hao
probability: 0.3
house_numbers: &house_number_prefer_traditional
alphanumeric:
default: *hao_traditional
probability: 0.7
alternatives:
- alternative: *hao
probability: 0.3
alphanumeric_phrase_probability: 0.6
levels: &levels_prefer_traditional
alphanumeric:
default: *lou_traditional
probability: 0.75
alternatives:
- alternative: *lou
probability: 0.15
- alternative: *ceng_traditional
probability: 0.06
- alternative: *ceng
probability: 0.04
numeric_probability: 1.0
po_boxes: &po_boxes_prefer_traditional
alphanumeric:
default: *youzheng_xinxiang_traditional
probability: 0.75
alternatives:
- alternative: *youzheng_xinxiang
probability: 0.25
numeric_probability: 1.0
postcodes: &postcodes_prefer_traditional
alphanumeric:
default: *youbian_traditional
probability: 0.75
alternatives:
- alternative: *youbian
probability: 0.25
# Macau
mo:
numbers: *numbers_prefer_traditional
house_numbers: *house_number_prefer_traditional
levels: *levels_prefer_traditional
po_boxes: *po_boxes_prefer_traditional
postcodes: *postcodes_prefer_traditional
units:
alphanumeric_probability:
numeric_probability: 0.9
alpha_probability: 0.1
# Taiwan
tw:
numbers: *numbers_prefer_traditional
house_numbers: *house_number_prefer_traditional
levels: *levels_prefer_traditional
po_boxes: *po_boxes_prefer_traditional
postcodes: *postcodes_prefer_traditional
units:
alphanumeric_probability:
numeric_probability: 0.9
alpha_probability: 0.1

View File

@@ -0,0 +1,153 @@
# zh_pinyin.yaml
# --------------
# Chinese (Pinyin)
whitespace: false
components:
level:
null_probability: 0.85 # Probability of doing nothing if no floor number is specified
alphanumeric_probability: 0.15
unit:
# If no unit number is specified
null_probability: 0.6
alphanumeric_probability: 0.4
numbers:
default: &hao
canonical: hao
numeric_affix:
affix: -hao
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
house_numbers:
alphanumeric:
default: *hao
alphanumeric_phrase_probability: 0.6
levels:
lou: &lou
canonical: lóu
numeric_affix:
affix: -lóu
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
lou_no_accent: &lou_no_accent
canonical: lou
numeric_affix:
affix: -lou
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
ceng: &ceng
canonical: céng
numeric_affix:
affix: -céng
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
ceng_no_accent: &ceng_no_accent
canonical: ceng
numeric_affix:
affix: -ceng
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
numbering_starts_at: 1
alphanumeric:
default: *lou
probability: 0.85
alternatives:
- alternative: *lou_no_accent
probability: 0.05
- alternative: *ceng
probability: 0.08
- alternative: *ceng_no_accent
probability: 0.02
numeric_probability: 1.0
po_boxes:
youzheng_xinxiang: &youzheng_xinxiang
canonical: youzheng xinxiang
numeric:
direction: left
numeric_probability: 1.0
alphanumeric:
default: *youzheng_xinxiang
numeric_probability: 1.0
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default: &youbian
canonical: yóubiān
numeric:
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.9
numeric_probability: 0.1
probability: 0.9
alternatives:
- alternative: &youbian_no_accent
canonical: youbian
numeric:
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.9
numeric_probability: 0.1
probability: 0.1
units:
shi: &shi
canonical: shì
numeric_affix:
affix: -shì
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
shi_no_accent: &shi_no_accent
canonical: shi
numeric_affix:
affix: -shi
upper_case: false
direction: right
numeric_probability: 0.0
numeric_affix_probability: 1.0
alphanumeric:
default: *shi
probability: 0.8
alternatives:
- alternative: *shi_no_accent
probability: 0.2
numeric_probability: 1.0
use_positive_numbers_probability: 1.0
# If we have a floor number (from building:levels), use it
use_floor_probability: 0.8

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: state

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of state_district and city, need to list specifically

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of state_district and city, need to list specifically

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# unclear what admin2 is, maybe city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,4 @@
admin_codes:
# The GeoNames admin1 boundaries are admin_level=5 or 6 in OSM
# However, they do appear to be states, might need to update Czech OSM config
admin1: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of city and island, need to list specifically

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,5 @@
admin_codes:
# The GeoNames admin1 boundaries are admin_level=6 in OSM
# However, they do appear to be states, might need to update Finnish OSM config
admin1: state_district
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of city and city_district, need to list specifically

View File

@@ -0,0 +1,4 @@
admin_codes:
# The GeoNames admin1 boundaries are admin_level=6 in OSM
# However, they do appear to be states, might need to update Hungary OSM config
admin1: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: state

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of state_district and city, need to list specifically

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
# admin2 is a mix of state_district and city, need to list specifically

View File

@@ -0,0 +1,4 @@
admin_codes:
# The admin1 names don't appear to exist in OSM, but would be states otherwise
admin1: state
admin2: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: state_district

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: country_region
# admin2 is a mix of state_district and city, need to list specifically

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,6 @@
admin_codes:
admin1: state_district
# The notion of a "barrio" in the official sense in PR is not quite a
# municipality, and has no current official purpose, but might be useful
# to have the name + "barrio" version available in libpostal
admin2: city

View File

@@ -0,0 +1,8 @@
admin_codes:
admin1: state_district
admin2: city
overrides:
id:
"2593105": "state" # Madeira
"3411865": "state" # Azores

View File

@@ -0,0 +1,4 @@
admin_codes:
admin1: state
# These are mostly admin_level=6, which maps to city in OSM
admin2: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: city

View File

@@ -0,0 +1,22 @@
admin_codes:
admin1: city
overrides:
id:
# Districts of Ljubljana (suburbs in OSM)
"3196350": "suburb" # Opština Ljubljana-Vič-Rudnik
"3196352": "suburb" # Opština [historical] Ljubljana-Šiška
"3196355": "suburb" # Opština Ljubljana-Moste-Polje
"3196356": "suburb" # Opština Ljubljana-Center
"3196357": "suburb" # Opčina Ljubljana-Bežigrad
"9794374": "suburb" # Črnuče District
"9794375": "suburb" # Dravlje District
"9794376": "suburb" # Golovec District
"9794377": "suburb" # Jarše District
"9794378": "suburb" # Posavje District
"9794379": "suburb" # Rožnik District
"9794380": "suburb" # Sostro District
"9794381": "suburb" # Šentvid District
"9794382": "suburb" # Šmarna Gora District
"9794384": "suburb" # Trnovo District
"9794386": "suburb" # Vič District

View File

@@ -0,0 +1,17 @@
admin_codes:
admin1: state
# admin2 is a mix of state_district and city, need to list specifically
admin2: state_district
overrides:
id:
# Districts of Bratislava
"8986283": "city_district" # Okres Bratislava I
"8986339": "city_district" # Okres Bratislava II
"8986340": "city_district" # Okres Bratislava III
"8986341": "city_district" # Okres Bratislava IV
"8986342": "city_district" # Okres Bratislava V
# Districts of Košice
"8986335": "city_district" # Košice I
"8986336": "city_district" # Košice II
"8986337": "city_district" # Košice III
"8986338": "city_district" # Košice IV

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,8 @@
admin_codes:
admin1: state
overrides:
id:
# Bangkok the state is treated as a city
# Note: we do this in OSM to get the boundary, so duplicate in GeoNames
"1609348": "city"

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,16 @@
admin_codes:
admin1: state
admin2: state_district
overrides:
id:
# Manhattan (Island)
"8479493": "city_district"
# Brooklyn
"5110300": "city_district"
# Bronx
"5110266": "city_district"
# Queens
"5133266": "city_district"
# Staten Island
"5139568": "city_district"

View File

@@ -0,0 +1,2 @@
admin_codes:
admin1: city

View File

@@ -0,0 +1,3 @@
admin_codes:
admin1: state
admin2: state_district

View File

@@ -0,0 +1,120 @@
names:
keys:
default: name
probability: 0.75
alternatives:
- alternative: short_name # e.g. NYC
probability: 0.12
- alternative: alt_name # e.g. New York (instead of New York City)
probability: 0.12
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
probability: 0.01
components:
country:
keys:
default: name
probability: 0.87
alternatives:
- alternative: ISO3166-1:alpha2
probability: 0.02
- alternative: ISO3166-1:alpha3
probability: 0.01
- alternative: short_name
probability: 0.04
- alternative: alt_name
probability: 0.04
- alternative: int_name
probability: 0.01
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
probability: 0.01
regex_replacements:
- country: fr
pattern: "(?:lyon|paris|marseilles?) ([\\d]+(?:e|er|ème|eme) arrondissement)"
replace_with_group: 1
replace_probability: 0.5
prefixes:
language:
ru:
city:
default:
prefix: г.
probability: 0.35
alternatives:
- alternative:
prefix: г
probability: 0.1
- alternative:
prefix: город
probability: 0.05
# This section overrides place names
exceptions:
# Boroughs of New York City
- id: 2552485 # New York County (don't use Manhattan)
type: relation
default: name # New York County
probability: 1.0
- id: 369518 # Kings County (don't use Brooklyn)
type: relation
default: name # Kings County
probability: 1.0
- id: 369519 # Queens County (don't use Queens)
type: relation
default: name # Queens County
probability: 1.0
- id: 2552450 # Bronx County (don't use The Bronx)
type: relation
default: name # Bronx County
probability: 1.0
- id: 962876 # Richmond County (don't use Staten Island)
type: relation
default: name # Richmond County
probability: 1.0
- id: 6577227 # Kingston Parish (always use Kingston)
type: relation
default: name # Kingston
probability: 1.0
- id: 30674098 # Sao Paulo
type: node
default: name
probability: 0.9
alternatives:
- alternative: alt_name
probability: 0.09
- alternative: official_name
probability: 0.01
- id: 298285 # Sao Paulo (relation)
type: relation
default: name
probability: 0.9
alternatives:
- alternative: alt_name
probability: 0.09
- alternative: official_name
probability: 0.01
- id: 556706 # New Zealand
type: relation
default: name:en
probability: 0.77
alternatives:
- alternative: name
probability: 0.1
- alternative: ISO3166-1:alpha2
probability: 0.02
- alternative: ISO3166-1:alpha3
probability: 0.01
- alternative: short_name
probability: 0.04
- alternative: alt_name
probability: 0.04
- alternative: int_name
probability: 0.01
- alternative: official_name
probability: 0.01
- id: 2383266 # Melbourne (city center)
type: relation
default: alt_name # Melbourne
probability: 1.0

View File

@@ -0,0 +1,11 @@
# Prefixes which can be stripped to normalize a place name
prefixes:
- stadtteil
- stadtbezirk
- gemeinde
- landkreis
- kreis
- grenze
- freistaat
- regierungsbezirk
- gemeindefreies gebiet

Some files were not shown because too many files have changed in this diff Show More