1001 lines
28 KiB
YAML
1001 lines
28 KiB
YAML
# bg.yaml
|
||
# -------
|
||
# Bulgarian language specification
|
||
|
||
alphabet: абвгдежзийклмнопрстуфхцчшщъьюя
|
||
alphanumeric_probability: 0.7
|
||
|
||
components:
|
||
level:
|
||
null_probability: 0.8
|
||
alphanumeric_probability: 0.2
|
||
|
||
staircase:
|
||
null_probability: 0.99
|
||
alphanumeric_probability: 0.01
|
||
|
||
# Entrance more common in Bulgarian addresses
|
||
entrance:
|
||
null_probability: 0.9
|
||
alphanumeric_probability: 0.1
|
||
|
||
unit:
|
||
null_probability: 0.7
|
||
alphanumeric_probability: 0.3
|
||
|
||
|
||
combinations:
|
||
-
|
||
components:
|
||
- house_number
|
||
- staircase
|
||
- level
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
-
|
||
components:
|
||
- house_number
|
||
- level
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
-
|
||
components:
|
||
- house_number
|
||
- level
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.1
|
||
# For unit types like 2/34
|
||
-
|
||
components:
|
||
- house_number
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
|
||
|
||
numbers:
|
||
default: &nomer
|
||
canonical: номер
|
||
abbreviated: №
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: &nomer_latin
|
||
canonical: nomer
|
||
abbreviated: "no"
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.05
|
||
|
||
|
||
house_number:
|
||
alphanumeric:
|
||
default: *nomer
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: *nomer_latin
|
||
probability: 0.05
|
||
|
||
alphanumeric_phrase_probability: 0.2
|
||
|
||
and:
|
||
default: &i
|
||
canonical: и
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: &i_latin
|
||
canonical: i
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.1
|
||
|
||
|
||
cross_streets:
|
||
i: *i
|
||
i_latin: *i_latin
|
||
corner: &ugul
|
||
canonical: ъгъл
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
ugul_latin: &ugul_latin
|
||
canonical: ŭgŭl
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
ugul_na: &ugul_na
|
||
canonical: ъгъл на
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
ugul_na_latin: &ugul_na_latin
|
||
canonical: ŭgŭl na
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
na_ugula_na: &na_ugula_na
|
||
canonical: на ъгъла на
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
na_ugula_na_latin: &na_ugula_na_latin
|
||
canonical: na ŭgŭla na
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
intersection:
|
||
default: *i
|
||
probability: 0.65
|
||
alternatives:
|
||
- alternative: *i_latin
|
||
probability: 0.05
|
||
- alternative: *ugul_na
|
||
probability: 0.075
|
||
- alternative: *ugul_na_latin
|
||
probability: 0.075
|
||
- alternative: *ugul
|
||
probability: 0.05
|
||
- alternative: *ugul_latin
|
||
probability: 0.05
|
||
- alternative: *na_ugula_na
|
||
probability: 0.025
|
||
- alternative: *na_ugula_na_latin
|
||
probability: 0.025
|
||
mezhdu: &mezhdu
|
||
canonical: между
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
parentheses_probability: 0.5
|
||
mezhdu_latin: &mezhdu_latin
|
||
canonical: mezhdu
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
parentheses_probability: 0.5
|
||
between:
|
||
default: *mezhdu
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: *mezhdu_latin
|
||
probability: 0.1
|
||
|
||
levels:
|
||
etazh: &etazh
|
||
canonical: этаж
|
||
abbreviated: эт
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
etazh_latin: &etazh_latin
|
||
canonical: etazh
|
||
abbreviated: et
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
kat: &kat
|
||
canonical: кат
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
kat_latin: &kat_latin
|
||
canonical: kat
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
nivo: &nivo
|
||
canonical: ниво
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
nivo_latin: &nivo_latin
|
||
canonical: nivo
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
prizemen_etazh: &prizemen_etazh
|
||
canonical: приземен етаж
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
prizemen_etazh_latin: &prizemen_etazh_latin
|
||
canonical: prizemen etazh
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
parter: &parter
|
||
canonical: партер
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
parter_latin: &parter_latin
|
||
canonical: parter
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
suteren: &suteren
|
||
canonical: сутерен
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
ordinal:
|
||
direction: right
|
||
number_abs_value: true
|
||
number_min_abs_value: 1
|
||
standalone_probability: 0.985
|
||
numeric_probability: 0.01
|
||
ordinal_probability: 0.005
|
||
suteren_latin: &suteren_latin
|
||
canonical: suteren
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
ordinal:
|
||
direction: right
|
||
number_abs_value: true
|
||
number_min_abs_value: 1
|
||
standalone_probability: 0.985
|
||
numeric_probability: 0.01
|
||
ordinal_probability: 0.005
|
||
|
||
aliases:
|
||
"<-1":
|
||
default: *suteren
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: *suteren_latin
|
||
probability: 0.1
|
||
"-1":
|
||
default: *suteren
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: *suteren_latin
|
||
probability: 0.1
|
||
"0":
|
||
default: *prizemen_etazh
|
||
probability: 0.7
|
||
alternatives:
|
||
- alternative: *prizemen_etazh_latin
|
||
probability: 0.05
|
||
- alternative: *parter
|
||
probability: 0.2
|
||
- alternative: *parter_latin
|
||
probability: 0.05
|
||
|
||
numbering_starts_at: 0
|
||
|
||
alphanumeric:
|
||
default: *etazh
|
||
probability: 0.8
|
||
alternatives:
|
||
- alternative: *etazh_latin
|
||
probability: 0.1
|
||
- alternative: *nivo
|
||
probability: 0.09
|
||
- alternative: *nivo_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.79 # With this probability, pick an integer
|
||
roman_numeral_probability: 0.2
|
||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||
|
||
blocks:
|
||
alphanumeric:
|
||
default: &blok
|
||
canonical: блок
|
||
abbreviated: бл
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: &blok_latin
|
||
canonical: blok
|
||
abbreviated: bl
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.05
|
||
|
||
categories:
|
||
near:
|
||
default:
|
||
canonical: в близост до
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.79
|
||
alternatives:
|
||
- alternative:
|
||
canonical: v blizost do
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: близо до
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.09
|
||
- alternative:
|
||
canonical: blizo do
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: около
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.09
|
||
- alternative:
|
||
canonical: okolo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: в района на
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.09
|
||
- alternative:
|
||
canonical: v raĭona na
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
|
||
nearby:
|
||
default:
|
||
canonical: наблизо
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.64
|
||
alternatives:
|
||
- alternative:
|
||
canonical: nablizo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: в близост до тук
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.19
|
||
- alternative:
|
||
canonical: v blizost do tuk
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: тук
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.09
|
||
- alternative:
|
||
canonical: tuk
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: по целия тук
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: po tseliya tuk
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
near_me:
|
||
default:
|
||
canonical: близо до мен
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative:
|
||
canonical: blizo do men
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
in:
|
||
default:
|
||
canonical: в
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative:
|
||
canonical: v
|
||
probability: 0.01
|
||
|
||
# Probabilities of each phrase
|
||
near_probability: 0.35
|
||
nearby_probability: 0.2
|
||
near_me_probability: 0.1
|
||
in_probability: 0.35
|
||
|
||
directions:
|
||
dyasno: &dyasno
|
||
canonical: дясно
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
dyasno_latin: &dyasno_latin
|
||
canonical: dyasno
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
lyavo: &lyavo
|
||
canonical: ляво
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
lyavo_latin: &lyavo_latin
|
||
canonical: lyavo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
alternatives:
|
||
- alternative: *dyasno
|
||
probability: 0.49
|
||
- alternative: *dyasno_latin
|
||
probability: 0.01
|
||
- alternative: *lyavo
|
||
probability: 0.49
|
||
- alternative: *lyavo_latin
|
||
probability: 0.01
|
||
|
||
cardinal_directions:
|
||
istok: &istok
|
||
canonical: изток
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: и
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
istok_latin: &istok_latin
|
||
canonical: istok
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: i
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
zapad: &zapad
|
||
canonical: запад
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: з
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
zapad_latin: &zapad_latin
|
||
canonical: zapad
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: z
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
sever: &sever
|
||
canonical: север
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: с
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
sever_latin: &sever_latin
|
||
canonical: sever
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: s
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
yug: &yug
|
||
canonical: юг
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: ю
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
yug_latin: &yug_latin
|
||
canonical: yug
|
||
abbreviated: y
|
||
sample: true
|
||
canonical_probability: 0.75
|
||
abbreviated_probability: 0.1
|
||
sample_probability: 0.15
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: y
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
alternatives:
|
||
- alternative: *sever
|
||
probability: 0.24
|
||
- alternative: *sever_latin
|
||
probability: 0.01
|
||
- alternative: *istok
|
||
probability: 0.24
|
||
- alternative: *istok_latin
|
||
probability: 0.01
|
||
- alternative: *yug
|
||
probability: 0.24
|
||
- alternative: *yug_latin
|
||
probability: 0.01
|
||
- alternative: *zapad
|
||
probability: 0.24
|
||
- alternative: *zapad_latin
|
||
probability: 0.01
|
||
|
||
entrances:
|
||
vkhod: &vkhod
|
||
canonical: вход
|
||
abbreviated: вх
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
vkhod_latin: &vkhod_latin
|
||
canonical: vkhod
|
||
abbreviated: vkh
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
|
||
# вход 1, вход A, etc.
|
||
alphanumeric:
|
||
default: *vkhod
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative: *vkhod_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.1 # e.g. вх 1
|
||
alpha_probability: 0.85 # e.g. вх A
|
||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
|
||
|
||
staircases:
|
||
stulbishte: &stulbishte
|
||
canonical: стълбище
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
stulbishte_latin: &stulbishte_latin
|
||
canonical: stŭlbishte
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
|
||
alphanumeric: &staircase_alphanumeric
|
||
default: *stulbishte
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative: *stulbishte_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.75
|
||
alpha_probability: 0.2
|
||
numeric_plus_alpha_probability: 0.025
|
||
alpha_plus_numeric_probability: 0.025
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
directional:
|
||
direction: right
|
||
direction_probability: 0.85
|
||
modifier:
|
||
alternatives:
|
||
- alternative: *dyasno
|
||
probability: 0.19
|
||
- alternative: *dyasno_latin
|
||
probability: 0.01
|
||
- alternative: *lyavo
|
||
probability: 0.19
|
||
- alternative: *lyavo_latin
|
||
probability: 0.01
|
||
- alternative: *sever
|
||
probability: 0.14
|
||
- alternative: *sever_latin
|
||
probability: 0.01
|
||
- alternative: *yug
|
||
probability: 0.14
|
||
- alternative: *yug_latin
|
||
probability: 0.01
|
||
- alternative: *istok
|
||
probability: 0.14
|
||
- alternative: *istok_latin
|
||
probability: 0.01
|
||
- alternative: *zapad
|
||
probability: 0.14
|
||
- alternative: *zapad_latin
|
||
probability: 0.01
|
||
|
||
po_boxes:
|
||
poshtenska_kutiya: &poshtenska_kutiya
|
||
canonical: пощенска кутия
|
||
abbreviated: пк
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
poshtenska_kutiya_latin: &poshtenska_kutiya_latin
|
||
canonical: poshtenska kutiya
|
||
abbreviated: pk
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
|
||
alphanumeric:
|
||
default: *poshtenska_kutiya
|
||
probability: 0.8
|
||
alternatives:
|
||
- alternative: *poshtenska_kutiya_latin
|
||
probability: 0.2
|
||
numeric_probability: 0.9 # p.k 123
|
||
alpha_probability: 0.05 # p.k А
|
||
numeric_plus_alpha_probability: 0.04 # p.k 123А
|
||
alpha_plus_numeric_probability: 0.01 # p.k А123
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
digits:
|
||
- length: 1
|
||
probability: 0.05
|
||
- length: 2
|
||
probability: 0.1
|
||
- length: 3
|
||
probability: 0.2
|
||
- length: 4
|
||
probability: 0.5
|
||
- length: 5
|
||
probability: 0.1
|
||
- length: 6
|
||
probability: 0.05
|
||
|
||
units:
|
||
apartament: &apartament
|
||
canonical: апартамент
|
||
abbreviated: ап
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.6
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
apartament_latin: &apartament_latin
|
||
canonical: apartament
|
||
abbreviated: ap
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.6
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
staya: &staya
|
||
canonical: стая
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
staya_latin: &staya_latin
|
||
canonical: staya
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
|
||
ofis: &ofis
|
||
canonical: офис
|
||
abbreviated: оф
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
ofis_latin: &ofis_latin
|
||
canonical: ofis
|
||
abbreviated: of
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
|
||
alphanumeric: &unit_alphanumeric
|
||
default: *apartament
|
||
probability: 0.65
|
||
alternatives:
|
||
- alternative: *apartament_latin
|
||
probability: 0.05
|
||
- alternative: *staya
|
||
probability: 0.25
|
||
- alternative: *staya_latin
|
||
probability: 0.05
|
||
|
||
numeric_probability: 0.9 # e.g. ап 1
|
||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||
alpha_probability: 0.04 # e.g. ап А
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||
use_floor_probability: 0.1
|
||
|
||
zones:
|
||
commercial:
|
||
default: *ofis
|
||
probability: 0.75
|
||
alternatives:
|
||
- alternative: *ofis_latin
|
||
probability: 0.05
|
||
- alternative: *staya
|
||
probability: 0.15
|
||
- alternative: *staya_latin
|
||
probability: 0.05
|
||
numeric_probability: 0.95 # e.g. ofis 1
|
||
numeric_plus_alpha_probability: 0.01 # e.g. ofis 1A
|
||
alpha_plus_numeric_probability: 0.01 # e.g. of A1
|
||
alpha_probability: 0.03 # e.g. of A
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
university:
|
||
default: *staya
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: *staya_latin
|
||
probability: 0.05
|
||
numeric_probability: 0.95 # e.g. staya 1
|
||
numeric_plus_alpha_probability: 0.01 # e.g. staya 1A
|
||
alpha_plus_numeric_probability: 0.01 # e.g. staya A1
|
||
alpha_probability: 0.03 # e.g. staya A
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1 |