[addresses] Russian address config
This commit is contained in:
957
resources/addresses/ru.yaml
Normal file
957
resources/addresses/ru.yaml
Normal file
@@ -0,0 +1,957 @@
|
|||||||
|
# ru.yaml
|
||||||
|
# -------
|
||||||
|
# Russian language specification
|
||||||
|
|
||||||
|
alphabet: абвгдежзийклмнопрстуфхцчшщъыьэюя
|
||||||
|
alphabet_probability: 0.7
|
||||||
|
|
||||||
|
components:
|
||||||
|
level:
|
||||||
|
null_probability: 0.95
|
||||||
|
alphanumeric_probability: 0.04
|
||||||
|
standalone_probability: 0.01
|
||||||
|
|
||||||
|
staircase:
|
||||||
|
null_probability: 0.99
|
||||||
|
alphanumeric_probability: 0.01
|
||||||
|
|
||||||
|
entrance:
|
||||||
|
null_probability: 0.999
|
||||||
|
alphanumeric_probability: 0.001
|
||||||
|
|
||||||
|
unit:
|
||||||
|
null_probability: 0.6
|
||||||
|
alphanumeric_probability: 0.4
|
||||||
|
|
||||||
|
|
||||||
|
numbers:
|
||||||
|
default: &nomer
|
||||||
|
canonical: номер
|
||||||
|
abbreviated: №
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
probability: 0.95
|
||||||
|
alternatives:
|
||||||
|
- alternative: &nomer_latin
|
||||||
|
canonical: nomer
|
||||||
|
abbreviated: "no"
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
|
||||||
|
house_number:
|
||||||
|
dom: &dom
|
||||||
|
canonical: дом
|
||||||
|
abbreviated: д
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
dom_latin: &dom_latin
|
||||||
|
canonical: dom
|
||||||
|
abbreviated: d
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
alphanumeric:
|
||||||
|
default: *dom
|
||||||
|
probability: 0.95
|
||||||
|
alternatives:
|
||||||
|
- alternative: *dom_latin
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
# Very common in Russian to write dom/d
|
||||||
|
alphanumeric_phrase_probability: 0.6
|
||||||
|
|
||||||
|
and:
|
||||||
|
default: &i
|
||||||
|
canonical: и
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: &i_latin
|
||||||
|
canonical: i
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cross_streets:
|
||||||
|
and: *i
|
||||||
|
and: *i_latin
|
||||||
|
corner: &ugol
|
||||||
|
canonical: угол
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
ugol_latin: &ugol_latin
|
||||||
|
canonical: ugol
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
uglu: &uglu
|
||||||
|
canonical: углу
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
uglu_latin: &uglu_latin
|
||||||
|
canonical: uglu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
na_uglu: &na_uglu
|
||||||
|
canonical: на углу
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
na_uglu_latin: &na_uglu_latin
|
||||||
|
canonical: na uglu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
intersection:
|
||||||
|
default: *i
|
||||||
|
probability: 0.65
|
||||||
|
alternatives:
|
||||||
|
- alternative: *i_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *ugol
|
||||||
|
probability: 0.075
|
||||||
|
- alternative: *ugol_latin
|
||||||
|
probability: 0.075
|
||||||
|
- alternative: *uglu
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *uglu_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *na_uglu
|
||||||
|
probability: 0.025
|
||||||
|
- alternative: *na_uglu_latin
|
||||||
|
probability: 0.025
|
||||||
|
mezhdu: &mezhdu
|
||||||
|
canonical: между
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
parentheses_probability: 0.5
|
||||||
|
mezhdu_latin: &mezhdu_latin
|
||||||
|
canonical: mezhdu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
parentheses_probability: 0.5
|
||||||
|
between:
|
||||||
|
default: *mezhdu
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: *mezhdu_latin
|
||||||
|
probability: 0.1
|
||||||
|
|
||||||
|
levels:
|
||||||
|
etazh: &etazh
|
||||||
|
canonical: этаж
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.7
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
etazh_latin: &etazh_latin
|
||||||
|
canonical: etazh
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.7
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
tsokolnyy_etazh: &tsokolnyy_etazh
|
||||||
|
canonical: цокольный этаж
|
||||||
|
abbreviated: цок эт
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
tsokolnyy_etazh_latin: &tsokolnyy_etazh_latin
|
||||||
|
canonical: tsokol'nyy etazh
|
||||||
|
abbreviated: tsok et
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
podval: &podval
|
||||||
|
canonical: подвал
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
numeric_affix:
|
||||||
|
affix: п
|
||||||
|
direction: left
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
number_abs_value: true
|
||||||
|
number_min_abs_value: 2
|
||||||
|
# Basement 2 == Sub-basement 1
|
||||||
|
number_subtract_abs_value: 1
|
||||||
|
standalone_probability: 0.985
|
||||||
|
numeric_probability: 0.005
|
||||||
|
numeric_affix_probability: 0.005
|
||||||
|
ordinal_probability: 0.005
|
||||||
|
podval_latin: &podval_latin
|
||||||
|
canonical: podval
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
numeric_affix:
|
||||||
|
affix: p
|
||||||
|
direction: left
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
number_abs_value: true
|
||||||
|
number_min_abs_value: 2
|
||||||
|
# Basement 2 == Sub-basement 1
|
||||||
|
number_subtract_abs_value: 1
|
||||||
|
standalone_probability: 0.985
|
||||||
|
numeric_probability: 0.005
|
||||||
|
numeric_affix_probability: 0.005
|
||||||
|
ordinal_probability: 0.005
|
||||||
|
|
||||||
|
aliases:
|
||||||
|
"<-1":
|
||||||
|
default: *podval
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: *podval_latin
|
||||||
|
probability: 0.1
|
||||||
|
"-1": &ground_floor
|
||||||
|
default: *tsokolnyy_etazh
|
||||||
|
probability: 0.89
|
||||||
|
alternatives:
|
||||||
|
- alternative: *tsokolnyy_etazh_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *etazh
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *etazh_latin
|
||||||
|
probability: 0.01
|
||||||
|
"0": *ground_floor
|
||||||
|
|
||||||
|
numbering_starts_at: 0
|
||||||
|
|
||||||
|
alphanumeric:
|
||||||
|
default: *etazh
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: *etazh_latin
|
||||||
|
probability: 0.1
|
||||||
|
numeric_probability: 0.99 # With this probability, pick an integer
|
||||||
|
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||||
|
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||||
|
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||||
|
|
||||||
|
categories:
|
||||||
|
near:
|
||||||
|
default:
|
||||||
|
canonical: вблизи
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.74
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: vblizi
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: близ
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: bliz
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: около
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: okolo
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: у
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: u
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: возле
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: vozle
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: рядом с
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: ryadom s
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
nearby:
|
||||||
|
default:
|
||||||
|
canonical: поблизости
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.64
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: poblizosti
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: рядом здесь
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.19
|
||||||
|
- alternative:
|
||||||
|
canonical: ryadom zdes'
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: здесь
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.09
|
||||||
|
- alternative:
|
||||||
|
canonical: zdes'
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: рядом
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: ryadom
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
near_me:
|
||||||
|
default:
|
||||||
|
canonical: рядом с мной
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: ryadom s mnoy
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
in:
|
||||||
|
default:
|
||||||
|
canonical: в
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: v
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
# Probabilities of each phrase
|
||||||
|
near_probability: 0.35
|
||||||
|
nearby_probability: 0.2
|
||||||
|
near_me_probability: 0.1
|
||||||
|
in_probability: 0.35
|
||||||
|
|
||||||
|
directions:
|
||||||
|
pravo: &pravo
|
||||||
|
canonical: право
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
pravo_latin: &pravo_latin
|
||||||
|
canonical: pravo
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
levo: &levo
|
||||||
|
canonical: лево
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
levo_latin: &levo_latin
|
||||||
|
canonical: levo
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pravo
|
||||||
|
probability: 0.49
|
||||||
|
- alternative: *pravo_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *levo
|
||||||
|
probability: 0.49
|
||||||
|
- alternative: *levo_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cardinal_directions:
|
||||||
|
vostok: &vostok
|
||||||
|
canonical: восток
|
||||||
|
abbreviated: в
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: в
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
vostok_latin: &vostok_latin
|
||||||
|
canonical: vostok
|
||||||
|
abbreviated: v
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: v
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
zapad: &zapad
|
||||||
|
canonical: запад
|
||||||
|
abbreviated: з
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: з
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
zapad_latin: &zapad_latin
|
||||||
|
canonical: zapad
|
||||||
|
abbreviated: z
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: z
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
sever: &sever
|
||||||
|
canonical: север
|
||||||
|
abbreviated: с
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: с
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
sever_latin: &sever_latin
|
||||||
|
canonical: sever
|
||||||
|
abbreviated: s
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: s
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
yug: &yug
|
||||||
|
canonical: Юг
|
||||||
|
abbreviated: Ю
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.75
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.15
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: Ю
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
yug_latin: &yug_latin
|
||||||
|
canonical: yug
|
||||||
|
abbreviated: y
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.75
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.15
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: y
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
alternatives:
|
||||||
|
- alternative: *sever
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *sever_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *vostok
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *vostok_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *yug
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *yug_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *zapad
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *zapad_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
entrances:
|
||||||
|
vkhod: &vkhod
|
||||||
|
canonical: вход
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
vkhod_latin: &vkhod_latin
|
||||||
|
canonical: vkhod
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
# вход 1, вход A, etc.
|
||||||
|
alphanumeric:
|
||||||
|
default: *vkhod
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *vkhod_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.1 # e.g. Wejście 1
|
||||||
|
alpha_probability: 0.85 # e.g. Wejście A
|
||||||
|
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||||
|
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
staircases:
|
||||||
|
lestnitsa: &lestnitsa
|
||||||
|
canonical: лестница
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
lestnitsa_latin: &lestnitsa_latin
|
||||||
|
canonical: lestnitsa
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
alphanumeric: &staircase_alphanumeric
|
||||||
|
default: *lestnitsa
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *lestnitsa_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.75
|
||||||
|
alpha_probability: 0.2
|
||||||
|
numeric_plus_alpha_probability: 0.025
|
||||||
|
alpha_plus_numeric_probability: 0.025
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
directional:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.85
|
||||||
|
modifier:
|
||||||
|
alternatives:
|
||||||
|
- alternative: *sever
|
||||||
|
- alternative: *vostok
|
||||||
|
- alternative: *yug
|
||||||
|
- alternative: *zapad
|
||||||
|
|
||||||
|
po_boxes:
|
||||||
|
abonementnyy_pochtovyy_yashchik: &abonementnyy_pochtovyy_yashchik
|
||||||
|
canonical: абонементный почтовый ящик
|
||||||
|
abbreviated: а/я
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.2
|
||||||
|
abbreviated_probability: 0.7
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
abonementnyy_pochtovyy_yashchik_latin: &abonementnyy_pochtovyy_yashchik_latin
|
||||||
|
canonical: abonementnyy pochtovyy yashchik
|
||||||
|
abbreviated: a/ya
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.2
|
||||||
|
abbreviated_probability: 0.7
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
pochtovyy_yashchik: &pochtovyy_yashchik
|
||||||
|
canonical: абонементный почтовый ящик
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
pochtovyy_yashchik_latin: &pochtovyy_yashchik_latin
|
||||||
|
canonical: pochtovyy yashchik
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
|
||||||
|
pochtovyy_abonentskiy_yashchik: &pochtovyy_abonentskiy_yashchik
|
||||||
|
canonical: почтовый абонентский ящик
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
|
||||||
|
pochtovyy_abonentskiy_yashchik_latin: &pochtovyy_abonentskiy_yashchik_latin
|
||||||
|
canonical: pochtovyy abonentskiy yashchik
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
|
||||||
|
alphanumeric:
|
||||||
|
default: *abonementnyy_pochtovyy_yashchik
|
||||||
|
probability: 0.79
|
||||||
|
alternatives:
|
||||||
|
- alternative: *abonementnyy_pochtovyy_yashchik_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pochtovyy_yashchik
|
||||||
|
probability: 0.14
|
||||||
|
- alternative: *pochtovyy_yashchik_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pochtovyy_abonentskiy_yashchik
|
||||||
|
probability: 0.04
|
||||||
|
- alternative: *pochtovyy_abonentskiy_yashchik_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.9 # а/я 123
|
||||||
|
alpha_probability: 0.05 # а/я А
|
||||||
|
numeric_plus_alpha_probability: 0.04 # а/я 123А
|
||||||
|
alpha_plus_numeric_probability: 0.01 # а/я А123
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
digits:
|
||||||
|
- length: 1
|
||||||
|
probability: 0.05
|
||||||
|
- length: 2
|
||||||
|
probability: 0.1
|
||||||
|
- length: 3
|
||||||
|
probability: 0.2
|
||||||
|
- length: 4
|
||||||
|
probability: 0.5
|
||||||
|
- length: 5
|
||||||
|
probability: 0.1
|
||||||
|
- length: 6
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
units:
|
||||||
|
kvartira: &kvartira
|
||||||
|
canonical: квартира
|
||||||
|
abbreviated: кв
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.6
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kvartira_latin: &kvartira_latin
|
||||||
|
canonical: kvartira
|
||||||
|
abbreviated: kv
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.6
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
kabinet: &kabinet
|
||||||
|
canonical: кабинет
|
||||||
|
abbreviated: каб
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kabinet_latin: &kabinet_latin
|
||||||
|
canonical: kabinet
|
||||||
|
abbreviated: kab
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
litera: &litera
|
||||||
|
canonical: литера
|
||||||
|
abbreviated: лит
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
litera_latin: &litera_latin
|
||||||
|
canonical: litera
|
||||||
|
abbreviated: lit
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
ofis: &ofis
|
||||||
|
canonical: офис
|
||||||
|
abbreviated: оф
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.5
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
ofis_latin: &ofis_latin
|
||||||
|
canonical: ofis
|
||||||
|
abbreviated: of
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.5
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
pomeshhenie: &pomeshhenie
|
||||||
|
canonical: помещение
|
||||||
|
abbreviated: пом
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
pomeshhenie_latin: &pomeshhenie_latin
|
||||||
|
canonical: pomeshhenie
|
||||||
|
abbreviated: pom
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
alphanumeric: &unit_alphanumeric
|
||||||
|
default: *kvartira
|
||||||
|
probability: 0.89
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kvartira_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pomeshhenie
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *pomeshhenie_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
numeric_probability: 0.9 # e.g. кв 1
|
||||||
|
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||||
|
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||||
|
alpha_probability: 0.04 # e.g. кв А
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||||
|
use_floor_probability: 0.1
|
||||||
|
|
||||||
|
alpha:
|
||||||
|
default: *kvartira
|
||||||
|
probability: 0.79
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kvartira_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pomeshhenie
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *pomeshhenie_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *litera
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *litera_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
|
||||||
|
zones:
|
||||||
|
commercial:
|
||||||
|
default: *kabinet
|
||||||
|
probability: 0.59
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kabinet_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *ofis
|
||||||
|
probability: 0.29
|
||||||
|
- alternative: *ofis_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pomeshhenie
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *pomeshhenie_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.95 # e.g. kabinet 1
|
||||||
|
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||||
|
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||||
|
alpha_probability: 0.03 # e.g. kab A
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
university:
|
||||||
|
default: *pomeshhenie
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pomeshhenie_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.95 # e.g. kabinet 1
|
||||||
|
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||||
|
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||||
|
alpha_probability: 0.03 # e.g. kab A
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
@@ -26,7 +26,7 @@ class AddressConfig(object):
|
|||||||
self.cache = {}
|
self.cache = {}
|
||||||
|
|
||||||
for filename in os.listdir(config_dir):
|
for filename in os.listdir(config_dir):
|
||||||
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml', 'pl.yaml'):
|
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml', 'pl.yaml', 'ru.yaml'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
|
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
|
||||||
|
|||||||
Reference in New Issue
Block a user