1115 lines
32 KiB
YAML
1115 lines
32 KiB
YAML
# ru.yaml
|
||
# -------
|
||
# Russian language specification
|
||
|
||
alphabet: абвгдежзийклмнопрстуфхцчшщъыьэюя
|
||
alphabet_probability: 0.7
|
||
|
||
components:
|
||
level:
|
||
null_probability: 0.95
|
||
alphanumeric_probability: 0.04
|
||
standalone_probability: 0.01
|
||
|
||
staircase:
|
||
null_probability: 0.99
|
||
alphanumeric_probability: 0.01
|
||
|
||
entrance:
|
||
null_probability: 0.999
|
||
alphanumeric_probability: 0.001
|
||
|
||
unit:
|
||
null_probability: 0.6
|
||
alphanumeric_probability: 0.4
|
||
|
||
|
||
combinations:
|
||
-
|
||
components:
|
||
- house_number
|
||
- staircase
|
||
- level
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
-
|
||
components:
|
||
- house_number
|
||
- level
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
-
|
||
components:
|
||
- house_number
|
||
- level
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.01
|
||
# For unit types like 2/34
|
||
-
|
||
components:
|
||
- house_number
|
||
- unit
|
||
label: house_number
|
||
separators:
|
||
- separator: "/"
|
||
probability: 0.95
|
||
- separator: "-"
|
||
probability: 0.05
|
||
probability: 0.005
|
||
|
||
|
||
numbers:
|
||
default: &nomer
|
||
canonical: номер
|
||
abbreviated: №
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: &nomer_latin
|
||
canonical: nomer
|
||
abbreviated: "no"
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
probability: 0.05
|
||
|
||
|
||
house_number:
|
||
dom: &dom
|
||
canonical: дом
|
||
abbreviated: д
|
||
sample: true
|
||
canonical_probability: 0.6
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
dom_latin: &dom_latin
|
||
canonical: dom
|
||
abbreviated: d
|
||
sample: true
|
||
canonical_probability: 0.6
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
alphanumeric:
|
||
default: *dom
|
||
probability: 0.95
|
||
alternatives:
|
||
- alternative: *dom_latin
|
||
probability: 0.05
|
||
|
||
# Very common in Russian to write dom/d
|
||
alphanumeric_phrase_probability: 0.6
|
||
|
||
and:
|
||
default: &i
|
||
canonical: и
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: &i_latin
|
||
canonical: i
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.1
|
||
|
||
|
||
|
||
cross_streets:
|
||
i: *i
|
||
i_latin: *i_latin
|
||
corner: &ugol
|
||
canonical: угол
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
ugol_latin: &ugol_latin
|
||
canonical: ugol
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
uglu: &uglu
|
||
canonical: углу
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
uglu_latin: &uglu_latin
|
||
canonical: uglu
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
na_uglu: &na_uglu
|
||
canonical: на углу
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
na_uglu_latin: &na_uglu_latin
|
||
canonical: na uglu
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
intersection:
|
||
default: *i
|
||
probability: 0.65
|
||
alternatives:
|
||
- alternative: *i_latin
|
||
probability: 0.05
|
||
- alternative: *ugol
|
||
probability: 0.075
|
||
- alternative: *ugol_latin
|
||
probability: 0.075
|
||
- alternative: *uglu
|
||
probability: 0.05
|
||
- alternative: *uglu_latin
|
||
probability: 0.05
|
||
- alternative: *na_uglu
|
||
probability: 0.025
|
||
- alternative: *na_uglu_latin
|
||
probability: 0.025
|
||
mezhdu: &mezhdu
|
||
canonical: между
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
parentheses_probability: 0.5
|
||
mezhdu_latin: &mezhdu_latin
|
||
canonical: mezhdu
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
parentheses_probability: 0.5
|
||
between:
|
||
default: *mezhdu
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: *mezhdu_latin
|
||
probability: 0.1
|
||
|
||
levels:
|
||
etazh: &etazh
|
||
canonical: этаж
|
||
abbreviated: эт
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
etazh_latin: &etazh_latin
|
||
canonical: etazh
|
||
abbreviated: et
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.3
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
uroven: &uroven
|
||
canonical: уровень
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
uroven_latin: &uroven_latin
|
||
canonical: uroven'
|
||
sample: true
|
||
canonical_probability: 0.7
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
digits:
|
||
ascii_probability: 0.8
|
||
roman_numeral_probability: 0.1
|
||
spellout_probability: 0.1
|
||
ordinal:
|
||
direction: right
|
||
digits:
|
||
ascii_probability: 0.5
|
||
roman_numeral_probability: 0.3
|
||
spellout_probability: 0.2
|
||
numeric_probability: 0.4
|
||
ordinal_probability: 0.6
|
||
pervyy_etazh: &pervyy_etazh
|
||
canonical: первый этаж
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
pervyy_etazh_latin: &pervyy_etazh_latin
|
||
canonical: pervyy etazh
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
nizhniy_etazh: &nizhniy_etazh
|
||
canonical: нижний этаж
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
nizhniy_etazh_latin: &nizhniy_etazh_latin
|
||
canonical: nizhniy etazh
|
||
sample: true
|
||
canonical_probability: 0.9
|
||
sample_probability: 0.1
|
||
tsokolnyy_etazh: &tsokolnyy_etazh
|
||
canonical: цокольный этаж
|
||
abbreviated: цок эт
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
tsokolnyy_etazh_latin: &tsokolnyy_etazh_latin
|
||
canonical: tsokol'nyy etazh
|
||
abbreviated: tsok et
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.2
|
||
podval: &podval
|
||
canonical: подвал
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
numeric_affix:
|
||
affix: п
|
||
direction: left
|
||
ordinal:
|
||
direction: right
|
||
number_abs_value: true
|
||
number_min_abs_value: 2
|
||
# Basement 2 == Sub-basement 1
|
||
number_subtract_abs_value: 1
|
||
standalone_probability: 0.985
|
||
numeric_probability: 0.005
|
||
numeric_affix_probability: 0.005
|
||
ordinal_probability: 0.005
|
||
podval_latin: &podval_latin
|
||
canonical: podval
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
direction_probability: 0.9
|
||
numeric_affix:
|
||
affix: p
|
||
direction: left
|
||
ordinal:
|
||
direction: right
|
||
number_abs_value: true
|
||
number_min_abs_value: 2
|
||
# Basement 2 == Sub-basement 1
|
||
number_subtract_abs_value: 1
|
||
standalone_probability: 0.985
|
||
numeric_probability: 0.005
|
||
numeric_affix_probability: 0.005
|
||
ordinal_probability: 0.005
|
||
|
||
aliases:
|
||
"<-1":
|
||
default: *podval
|
||
probability: 0.9
|
||
alternatives:
|
||
- alternative: *podval_latin
|
||
probability: 0.1
|
||
"-1": &ground_floor
|
||
default: *tsokolnyy_etazh
|
||
probability: 0.89
|
||
alternatives:
|
||
- alternative: *tsokolnyy_etazh_latin
|
||
probability: 0.01
|
||
- alternative: *etazh
|
||
probability: 0.09
|
||
- alternative: *etazh_latin
|
||
probability: 0.01
|
||
"0":
|
||
default: *pervyy_etazh
|
||
probability: 0.6
|
||
alternatives:
|
||
- alternative: *pervyy_etazh_latin
|
||
probability: 0.05
|
||
- alternative: *nizhniy_etazh
|
||
probability: 0.2
|
||
- alternative: *nizhniy_etazh_latin
|
||
probability: 0.05
|
||
- alternative: *tsokolnyy_etazh
|
||
probability: 0.075
|
||
- alternative: *tsokolnyy_etazh_latin
|
||
probability: 0.025
|
||
|
||
numbering_starts_at: 0
|
||
|
||
alphanumeric:
|
||
default: *etazh
|
||
probability: 0.8
|
||
alternatives:
|
||
- alternative: *etazh_latin
|
||
probability: 0.1
|
||
- alternative: *uroven
|
||
probability: 0.09
|
||
- alternative: *uroven_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.79 # With this probability, pick an integer
|
||
roman_numeral_probability: 0.2
|
||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||
|
||
categories:
|
||
near:
|
||
default:
|
||
canonical: вблизи
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.69
|
||
alternatives:
|
||
- alternative:
|
||
canonical: vblizi
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: близ
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: bliz
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: около
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: под
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: pod
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: okolo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: у
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: u
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: возле
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: vozle
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: рядом с
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: ryadom s
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
|
||
nearby:
|
||
default:
|
||
canonical: поблизости
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.64
|
||
alternatives:
|
||
- alternative:
|
||
canonical: poblizosti
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: рядом здесь
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.19
|
||
- alternative:
|
||
canonical: ryadom zdes'
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: здесь
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.09
|
||
- alternative:
|
||
canonical: zdes'
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
- alternative:
|
||
canonical: рядом
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.04
|
||
- alternative:
|
||
canonical: ryadom
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
near_me:
|
||
default:
|
||
canonical: рядом с мной
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative:
|
||
canonical: ryadom s mnoy
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
probability: 0.01
|
||
in:
|
||
default:
|
||
canonical: в
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative:
|
||
canonical: v
|
||
probability: 0.01
|
||
|
||
# Probabilities of each phrase
|
||
near_probability: 0.35
|
||
nearby_probability: 0.2
|
||
near_me_probability: 0.1
|
||
in_probability: 0.35
|
||
|
||
directions:
|
||
pravo: &pravo
|
||
canonical: право
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
pravo_latin: &pravo_latin
|
||
canonical: pravo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
levo: &levo
|
||
canonical: лево
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
levo_latin: &levo_latin
|
||
canonical: levo
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: right
|
||
alternatives:
|
||
- alternative: *pravo
|
||
probability: 0.49
|
||
- alternative: *pravo_latin
|
||
probability: 0.01
|
||
- alternative: *levo
|
||
probability: 0.49
|
||
- alternative: *levo_latin
|
||
probability: 0.01
|
||
|
||
|
||
|
||
cardinal_directions:
|
||
vostok: &vostok
|
||
canonical: восток
|
||
abbreviated: в
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: в
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
vostok_latin: &vostok_latin
|
||
canonical: vostok
|
||
abbreviated: v
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: v
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
zapad: &zapad
|
||
canonical: запад
|
||
abbreviated: з
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: з
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
zapad_latin: &zapad_latin
|
||
canonical: zapad
|
||
abbreviated: z
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: z
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
sever: &sever
|
||
canonical: север
|
||
abbreviated: с
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: с
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
sever_latin: &sever_latin
|
||
canonical: sever
|
||
abbreviated: s
|
||
canonical_probability: 0.95
|
||
abbreviated_probability: 0.05
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: s
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
yug: &yug
|
||
canonical: Юг
|
||
abbreviated: Ю
|
||
sample: true
|
||
canonical_probability: 0.75
|
||
abbreviated_probability: 0.1
|
||
sample_probability: 0.15
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: Ю
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
yug_latin: &yug_latin
|
||
canonical: yug
|
||
abbreviated: y
|
||
sample: true
|
||
canonical_probability: 0.75
|
||
abbreviated_probability: 0.1
|
||
sample_probability: 0.15
|
||
numeric:
|
||
direction: right
|
||
numeric_affix:
|
||
affix: y
|
||
direction: right
|
||
numeric_probability: 0.5
|
||
numeric_affix_probability: 0.5
|
||
|
||
alternatives:
|
||
- alternative: *sever
|
||
probability: 0.24
|
||
- alternative: *sever_latin
|
||
probability: 0.01
|
||
- alternative: *vostok
|
||
probability: 0.24
|
||
- alternative: *vostok_latin
|
||
probability: 0.01
|
||
- alternative: *yug
|
||
probability: 0.24
|
||
- alternative: *yug_latin
|
||
probability: 0.01
|
||
- alternative: *zapad
|
||
probability: 0.24
|
||
- alternative: *zapad_latin
|
||
probability: 0.01
|
||
|
||
entrances:
|
||
vkhod: &vkhod
|
||
canonical: вход
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
vkhod_latin: &vkhod_latin
|
||
canonical: vkhod
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
|
||
# вход 1, вход A, etc.
|
||
alphanumeric:
|
||
default: *vkhod
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative: *vkhod_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.1 # e.g. Wejście 1
|
||
alpha_probability: 0.85 # e.g. Wejście A
|
||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
staircases:
|
||
lestnitsa: &lestnitsa
|
||
canonical: лестница
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
lestnitsa_latin: &lestnitsa_latin
|
||
canonical: lestnitsa
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
|
||
alphanumeric: &staircase_alphanumeric
|
||
default: *lestnitsa
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative: *lestnitsa_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.75
|
||
alpha_probability: 0.2
|
||
numeric_plus_alpha_probability: 0.025
|
||
alpha_plus_numeric_probability: 0.025
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
directional:
|
||
direction: left
|
||
direction_probability: 0.85
|
||
modifier:
|
||
alternatives:
|
||
- alternative: *sever
|
||
- alternative: *vostok
|
||
- alternative: *yug
|
||
- alternative: *zapad
|
||
|
||
po_boxes:
|
||
abonementnyy_pochtovyy_yashchik: &abonementnyy_pochtovyy_yashchik
|
||
canonical: абонементный почтовый ящик
|
||
abbreviated: а/я
|
||
sample: true
|
||
canonical_probability: 0.2
|
||
abbreviated_probability: 0.7
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
abonementnyy_pochtovyy_yashchik_latin: &abonementnyy_pochtovyy_yashchik_latin
|
||
canonical: abonementnyy pochtovyy yashchik
|
||
abbreviated: a/ya
|
||
sample: true
|
||
canonical_probability: 0.2
|
||
abbreviated_probability: 0.7
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
pochtovyy_yashchik: &pochtovyy_yashchik
|
||
canonical: абонементный почтовый ящик
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
pochtovyy_yashchik_latin: &pochtovyy_yashchik_latin
|
||
canonical: pochtovyy yashchik
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
|
||
pochtovyy_abonentskiy_yashchik: &pochtovyy_abonentskiy_yashchik
|
||
canonical: почтовый абонентский ящик
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
|
||
pochtovyy_abonentskiy_yashchik_latin: &pochtovyy_abonentskiy_yashchik_latin
|
||
canonical: pochtovyy abonentskiy yashchik
|
||
sample: true
|
||
canonical_probability: 0.8
|
||
sample_probability: 0.2
|
||
numeric:
|
||
direction: left
|
||
add_number_phrase: true
|
||
add_number_phrase_probability: 0.2
|
||
|
||
alphanumeric:
|
||
default: *abonementnyy_pochtovyy_yashchik
|
||
probability: 0.79
|
||
alternatives:
|
||
- alternative: *abonementnyy_pochtovyy_yashchik_latin
|
||
probability: 0.01
|
||
- alternative: *pochtovyy_yashchik
|
||
probability: 0.14
|
||
- alternative: *pochtovyy_yashchik_latin
|
||
probability: 0.01
|
||
- alternative: *pochtovyy_abonentskiy_yashchik
|
||
probability: 0.04
|
||
- alternative: *pochtovyy_abonentskiy_yashchik_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.9 # а/я 123
|
||
alpha_probability: 0.05 # а/я А
|
||
numeric_plus_alpha_probability: 0.04 # а/я 123А
|
||
alpha_plus_numeric_probability: 0.01 # а/я А123
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
digits:
|
||
- length: 1
|
||
probability: 0.05
|
||
- length: 2
|
||
probability: 0.1
|
||
- length: 3
|
||
probability: 0.2
|
||
- length: 4
|
||
probability: 0.5
|
||
- length: 5
|
||
probability: 0.1
|
||
- length: 6
|
||
probability: 0.05
|
||
|
||
units:
|
||
kvartira: &kvartira
|
||
canonical: квартира
|
||
abbreviated: кв
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.6
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
kvartira_latin: &kvartira_latin
|
||
canonical: kvartira
|
||
abbreviated: kv
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.6
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
|
||
kabinet: &kabinet
|
||
canonical: кабинет
|
||
abbreviated: каб
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
kabinet_latin: &kabinet_latin
|
||
canonical: kabinet
|
||
abbreviated: kab
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
|
||
litera: &litera
|
||
canonical: литера
|
||
abbreviated: лит
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
litera_latin: &litera_latin
|
||
canonical: litera
|
||
abbreviated: lit
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
|
||
ofis: &ofis
|
||
canonical: офис
|
||
abbreviated: оф
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
ofis_latin: &ofis_latin
|
||
canonical: ofis
|
||
abbreviated: of
|
||
sample: true
|
||
canonical_probability: 0.4
|
||
abbreviated_probability: 0.5
|
||
sample_probability: 0.1
|
||
numeric:
|
||
direction: left
|
||
|
||
pomeshhenie: &pomeshhenie
|
||
canonical: помещение
|
||
abbreviated: пом
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
pomeshhenie_latin: &pomeshhenie_latin
|
||
canonical: pomeshhenie
|
||
abbreviated: pom
|
||
sample: true
|
||
canonical_probability: 0.3
|
||
abbreviated_probability: 0.4
|
||
sample_probability: 0.3
|
||
numeric:
|
||
direction: left
|
||
|
||
alphanumeric: &unit_alphanumeric
|
||
default: *kvartira
|
||
probability: 0.89
|
||
alternatives:
|
||
- alternative: *kvartira_latin
|
||
probability: 0.01
|
||
- alternative: *pomeshhenie
|
||
probability: 0.09
|
||
- alternative: *pomeshhenie_latin
|
||
probability: 0.01
|
||
|
||
numeric_probability: 0.9 # e.g. кв 1
|
||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||
alpha_probability: 0.04 # e.g. кв А
|
||
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||
use_floor_probability: 0.1
|
||
|
||
alpha:
|
||
default: *kvartira
|
||
probability: 0.79
|
||
alternatives:
|
||
- alternative: *kvartira_latin
|
||
probability: 0.01
|
||
- alternative: *pomeshhenie
|
||
probability: 0.09
|
||
- alternative: *pomeshhenie_latin
|
||
probability: 0.01
|
||
- alternative: *litera
|
||
probability: 0.09
|
||
- alternative: *litera_latin
|
||
probability: 0.01
|
||
|
||
|
||
zones:
|
||
commercial:
|
||
default: *kabinet
|
||
probability: 0.59
|
||
alternatives:
|
||
- alternative: *kabinet_latin
|
||
probability: 0.01
|
||
- alternative: *ofis
|
||
probability: 0.29
|
||
- alternative: *ofis_latin
|
||
probability: 0.01
|
||
- alternative: *pomeshhenie
|
||
probability: 0.09
|
||
- alternative: *pomeshhenie_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.95 # e.g. kabinet 1
|
||
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||
alpha_probability: 0.03 # e.g. kab A
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|
||
|
||
university:
|
||
default: *pomeshhenie
|
||
probability: 0.99
|
||
alternatives:
|
||
- alternative: *pomeshhenie_latin
|
||
probability: 0.01
|
||
numeric_probability: 0.95 # e.g. kabinet 1
|
||
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||
alpha_probability: 0.03 # e.g. kab A
|
||
alpha_plus_numeric:
|
||
whitespace_probability: 0.1
|
||
numeric_plus_alpha:
|
||
whitespace_probability: 0.1
|