[addresses] Serbian address config

This commit is contained in:
Al
2016-07-05 15:48:27 -04:00
parent 631136e98f
commit 79e1d7639b

899
resources/addresses/sr.yaml Normal file
View File

@@ -0,0 +1,899 @@
# sr.yaml
# -------
# Serbian language specification
alphabet: абвгдђежзијклљмнњопрстћуфхцчџш
alphanumeric_probability: 0.7
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.04
standalone_probability: 0.01
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
# Note: no combinations because of the house numbering scheme
numbers:
default: &broj
canonical: број
abbreviated: бр
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "бр."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alternatives:
- alternative: &broj_latin
canonical: broj
abbreviated: br
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "br."
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
and:
default: &i
canonical: и
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.9
alternatives:
- alternative: &i_latin
canonical: i
sample: true
canonical_probability: 0.8
sample_probability: 0.2
probability: 0.1
cross_streets:
i: *i
i_latin: *i_latin
at: &na
canonical: на
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_latin: &na_latin
canonical: na
sample: true
canonical_probability: 0.8
sample_probability: 0.2
corner: &ugao
canonical: угао
sample: true
canonical_probability: 0.8
sample_probability: 0.2
ugao_latin: &ugao_latin
canonical: ugao
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu: &na_uglu
canonical: на углу
sample: true
canonical_probability: 0.8
sample_probability: 0.2
na_uglu_latin: &na_uglu_latin
canonical: na uglu
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *i
probability: 0.65
alternatives:
- alternative: *i_latin
probability: 0.05
- alternative: *na
probability: 0.075
- alternative: *na_latin
probability: 0.025
- alternative: *ugao
probability: 0.1
- alternative: *ugao_latin
probability: 0.05
- alternative: *na_uglu
probability: 0.025
- alternative: *na_uglu_latin
probability: 0.025
izmedu: &izmedu
canonical: између
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
izmedu_latin: &izmedu_latin
canonical: između
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *izmedu
probability: 0.9
alternatives:
- alternative: *izmedu_latin
probability: 0.1
levels:
sprat: &sprat
canonical: спрат
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
sprat_latin: &sprat_latin
canonical: sprat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
kat: &kat
canonical: кат
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
kat_latin: &kat_latin
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
etaza: &etaza
canonical: етажа
abbreviated: ет
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
etaza_latin: &etaza_latin
canonical: etaža
abbreviated: et
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
prizemlje: &prizemlje
canonical: приземље
sample: true
canonical_probability: 0.9
sample_probability: 0.1
prizemlje_latin: &prizemlje_latin
canonical: prizemlje
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter: &parter
canonical: партер
sample: true
canonical_probability: 0.9
sample_probability: 0.1
parter_latin: &parter_latin
canonical: parter
sample: true
canonical_probability: 0.9
sample_probability: 0.1
podrum: &podrum
canonical: подрум
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. подрум 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. подрум
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
podrum_latin: &podrum_latin
canonical: podrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. подрум 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. подрум
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *podrum
probability: 0.8
alternatives:
- alternative: *podrum_latin
probability: 0.2
"-1":
default: *podrum
probability: 0.8
alternatives:
- alternative: *podrum_latin
probability: 0.2
"0":
default: *prizemlje
probability: 0.45
alternatives:
- alternative: *prizemlje_latin
probability: 0.05
- alternative: *parter
probability: 0.35
- alternative: *parter_latin
probability: 0.05
- alternative: *sprat
probability: 0.04
- alternative: *sprat_latin
probability: 0.01
- alternative: *kat
probability: 0.04
- alternative: *kat_latin
probability: 0.01
numbering_starts_at: 0
alphanumeric:
default: *sprat
probability: 0.65
alternatives:
- alternative: *sprat_latin
probability: 0.1
- alternative: *kat
probability: 0.15
- alternative: *kat_latin
probability: 0.05
- alternative: *etaza
probability: 0.04
- alternative: *etaza_latin
probability: 0.01
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &desno
canonical: десно
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
desno_latin: &desno_latin
canonical: desno
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &levo
canonical: лево
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
levo_latin: &levo_latin
canonical: лево
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *desno
probability: 0.45
- alternative: *desno_latin
probability: 0.05
- alternative: *levo
probability: 0.45
- alternative: *levo_latin
probability: 0.05
cardinal_directions:
east: &istok
canonical: исток
abbreviated: и
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: и
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
istok_latin: &istok_latin
canonical: istok
abbreviated: i
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: i
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &zapad
canonical: запад
abbreviated: з
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: з
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
zapad_latin: &zapad_latin
canonical: zapad
abbreviated: z
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: z
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &sever
canonical: север
abbreviated: с
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: с
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
sever_latin: &sever_latin
canonical: sever
abbreviated: s
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &jug
canonical: југ
abbreviated: ј
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: ј
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
jug_latin: &jug_latin
canonical: jug
abbreviated: j
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: j
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *sever
probability: 0.23
- alternative: *sever_latin
probability: 0.02
- alternative: *istok
probability: 0.23
- alternative: *istok_latin
probability: 0.02
- alternative: *jug
probability: 0.23
- alternative: *jug_latin
probability: 0.02
- alternative: *zapad
probability: 0.23
- alternative: *zapad_latin
probability: 0.02
entrances:
ulaz: &ulaz
canonical: улаз
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ulaz_latin: &ulaz_latin
canonical: ulaz
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# Ulaz 1, Ulaz A, etc.
alphanumeric: &entrance_alphanumeric
default: *ulaz
probability: 0.8
alternatives:
- alternative: *ulaz_latin
probability: 0.2
numeric_probability: 0.1 # e.g. Ulaz 1
alpha_probability: 0.85 # e.g. Ulaz A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
stepeniste: &stepeniste
canonical: степениште
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
stepeniste_latin: &stepeniste_latin
canonical: stepenište
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *stepeniste
probability: 0.8
alternatives:
- alternative: *stepeniste_latin
probability: 0.2
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *desno
probability: 0.2
- alternative: *levo
probability: 0.2
- alternative: *sever
probability: 0.14
- alternative: *sever_latin
probability: 0.01
- alternative: *jug
probability: 0.14
- alternative: *jug_latin
probability: 0.01
- alternative: *istok
probability: 0.14
- alternative: *istok_latin
probability: 0.01
- alternative: *zapad
probability: 0.14
- alternative: *zapad_latin
probability: 0.01
po_boxes:
postanski_fah: &postanski_fah
canonical: поштански фах
abbreviated: пф
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
postanski_fah_latin: &postanski_fah_latin
canonical: poštanski fah
abbreviated: pf
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2 # poštanski fah br. 1234
postanski_pretinac: &postanski_pretinac
canonical: поштански претинац
sample: true
canonical_probability: 0.6
sample_probability: 0.5
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pretinac_latin: &postanski_pretinac_latin
canonical: poštanski pretinac
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pregradak: &postanski_pregradak
canonical: поштански преградак
sample: true
canonical_probability: 0.6
sample_probability: 0.5
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
postanski_pregradak_latin: &postanski_pregradak_latin
canonical: poštanski pregradak
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *postanski_fah
probability: 0.7
alternatives:
- alternative: *postanski_fah_latin
probability: 0.05
- alternative: *postanski_pretinac
probability: 0.1
- alternative: *postanski_pretinac_latin
probability: 0.05
- alternative: *postanski_pregradak
probability: 0.075
- alternative: *postanski_pregradak_latin
probability: 0.025
numeric_probability: 0.9 # pf 123
alpha_probability: 0.05 # pf A
numeric_plus_alpha_probability: 0.04 # pf 123G
alpha_plus_numeric_probability: 0.01 # pf A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
stan: &stan
canonical: стан
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
stan_latin: &stan_latin
canonical: stan
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: апартман
abbreviated: ап
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman_latin: &apartman_latin
canonical: apartman
abbreviated: ap
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba: &soba
canonical: соба
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
soba_latin: &soba_latin
canonical: soba
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelarija: &kancelarija
canonical: канцеларија
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
kancelarija_latin: &kancelarija_latin
canonical: kancelarija
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *stan
probability: 0.5
alternatives:
- alternative: *stan_latin
probability: 0.1
- alternative: *apartman
probability: 0.2
- alternative: *apartman_latin
probability: 0.05
- alternative: *soba
probability: 0.1
- alternative: *soba_latin
probability: 0.05
numeric_probability: 0.9 # e.g. stan. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. stan A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.01
zones:
commercial: &commercial_unit_types
default: *soba
probability: 0.55
alternatives:
- alternative: *soba_latin
probability: 0.05
- alternative: *kancelarija
probability: 0.35
- alternative: *kancelarija_latin
probability: 0.05
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *soba
probability: 0.9
alternatives:
- alternative: *soba_latin
probability: 0.1
numeric_probability: 0.95 # e.g. soba 1
numeric_plus_alpha_probability: 0.01 # e.g. soba 1A
alpha_plus_numeric_probability: 0.01 # e.g. soba A1
alpha_probability: 0.03 # e.g. soba A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1