[addresses] Russian address config
This commit is contained in:
957
resources/addresses/ru.yaml
Normal file
957
resources/addresses/ru.yaml
Normal file
@@ -0,0 +1,957 @@
|
||||
# ru.yaml
|
||||
# -------
|
||||
# Russian language specification
|
||||
|
||||
alphabet: абвгдежзийклмнопрстуфхцчшщъыьэюя
|
||||
alphabet_probability: 0.7
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.04
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
|
||||
numbers:
|
||||
default: &nomer
|
||||
canonical: номер
|
||||
abbreviated: №
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: &nomer_latin
|
||||
canonical: nomer
|
||||
abbreviated: "no"
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
probability: 0.05
|
||||
|
||||
|
||||
house_number:
|
||||
dom: &dom
|
||||
canonical: дом
|
||||
abbreviated: д
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
dom_latin: &dom_latin
|
||||
canonical: dom
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
alphanumeric:
|
||||
default: *dom
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: *dom_latin
|
||||
probability: 0.05
|
||||
|
||||
# Very common in Russian to write dom/d
|
||||
alphanumeric_phrase_probability: 0.6
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: и
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &i_latin
|
||||
canonical: i
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
|
||||
|
||||
cross_streets:
|
||||
and: *i
|
||||
and: *i_latin
|
||||
corner: &ugol
|
||||
canonical: угол
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
ugol_latin: &ugol_latin
|
||||
canonical: ugol
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
uglu: &uglu
|
||||
canonical: углу
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
uglu_latin: &uglu_latin
|
||||
canonical: uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu: &na_uglu
|
||||
canonical: на углу
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_uglu_latin: &na_uglu_latin
|
||||
canonical: na uglu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *i_latin
|
||||
probability: 0.05
|
||||
- alternative: *ugol
|
||||
probability: 0.075
|
||||
- alternative: *ugol_latin
|
||||
probability: 0.075
|
||||
- alternative: *uglu
|
||||
probability: 0.05
|
||||
- alternative: *uglu_latin
|
||||
probability: 0.05
|
||||
- alternative: *na_uglu
|
||||
probability: 0.025
|
||||
- alternative: *na_uglu_latin
|
||||
probability: 0.025
|
||||
mezhdu: &mezhdu
|
||||
canonical: между
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
mezhdu_latin: &mezhdu_latin
|
||||
canonical: mezhdu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *mezhdu
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *mezhdu_latin
|
||||
probability: 0.1
|
||||
|
||||
levels:
|
||||
etazh: &etazh
|
||||
canonical: этаж
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
etazh_latin: &etazh_latin
|
||||
canonical: etazh
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
tsokolnyy_etazh: &tsokolnyy_etazh
|
||||
canonical: цокольный этаж
|
||||
abbreviated: цок эт
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
tsokolnyy_etazh_latin: &tsokolnyy_etazh_latin
|
||||
canonical: tsokol'nyy etazh
|
||||
abbreviated: tsok et
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
podval: &podval
|
||||
canonical: подвал
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
numeric_affix:
|
||||
affix: п
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
# Basement 2 == Sub-basement 1
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
podval_latin: &podval_latin
|
||||
canonical: podval
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
# Basement 2 == Sub-basement 1
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *podval
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *podval_latin
|
||||
probability: 0.1
|
||||
"-1": &ground_floor
|
||||
default: *tsokolnyy_etazh
|
||||
probability: 0.89
|
||||
alternatives:
|
||||
- alternative: *tsokolnyy_etazh_latin
|
||||
probability: 0.01
|
||||
- alternative: *etazh
|
||||
probability: 0.09
|
||||
- alternative: *etazh_latin
|
||||
probability: 0.01
|
||||
"0": *ground_floor
|
||||
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *etazh
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *etazh_latin
|
||||
probability: 0.1
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: вблизи
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.74
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: vblizi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: близ
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: bliz
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: около
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: okolo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: у
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: возле
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: vozle
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: рядом с
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: ryadom s
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: поблизости
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.64
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: poblizosti
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: рядом здесь
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.19
|
||||
- alternative:
|
||||
canonical: ryadom zdes'
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: здесь
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.09
|
||||
- alternative:
|
||||
canonical: zdes'
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: рядом
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: ryadom
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
near_me:
|
||||
default:
|
||||
canonical: рядом с мной
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: ryadom s mnoy
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
in:
|
||||
default:
|
||||
canonical: в
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: v
|
||||
probability: 0.01
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
directions:
|
||||
pravo: &pravo
|
||||
canonical: право
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
pravo_latin: &pravo_latin
|
||||
canonical: pravo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
levo: &levo
|
||||
canonical: лево
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
levo_latin: &levo_latin
|
||||
canonical: levo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *pravo
|
||||
probability: 0.49
|
||||
- alternative: *pravo_latin
|
||||
probability: 0.01
|
||||
- alternative: *levo
|
||||
probability: 0.49
|
||||
- alternative: *levo_latin
|
||||
probability: 0.01
|
||||
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
vostok: &vostok
|
||||
canonical: восток
|
||||
abbreviated: в
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: в
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
vostok_latin: &vostok_latin
|
||||
canonical: vostok
|
||||
abbreviated: v
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: v
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
zapad: &zapad
|
||||
canonical: запад
|
||||
abbreviated: з
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: з
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
zapad_latin: &zapad_latin
|
||||
canonical: zapad
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: z
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
sever: &sever
|
||||
canonical: север
|
||||
abbreviated: с
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: с
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
sever_latin: &sever_latin
|
||||
canonical: sever
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
yug: &yug
|
||||
canonical: Юг
|
||||
abbreviated: Ю
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: Ю
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
yug_latin: &yug_latin
|
||||
canonical: yug
|
||||
abbreviated: y
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: y
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
probability: 0.24
|
||||
- alternative: *sever_latin
|
||||
probability: 0.01
|
||||
- alternative: *vostok
|
||||
probability: 0.24
|
||||
- alternative: *vostok_latin
|
||||
probability: 0.01
|
||||
- alternative: *yug
|
||||
probability: 0.24
|
||||
- alternative: *yug_latin
|
||||
probability: 0.01
|
||||
- alternative: *zapad
|
||||
probability: 0.24
|
||||
- alternative: *zapad_latin
|
||||
probability: 0.01
|
||||
|
||||
entrances:
|
||||
vkhod: &vkhod
|
||||
canonical: вход
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
vkhod_latin: &vkhod_latin
|
||||
canonical: vkhod
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# вход 1, вход A, etc.
|
||||
alphanumeric:
|
||||
default: *vkhod
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *vkhod_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.1 # e.g. Wejście 1
|
||||
alpha_probability: 0.85 # e.g. Wejście A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
lestnitsa: &lestnitsa
|
||||
canonical: лестница
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
lestnitsa_latin: &lestnitsa_latin
|
||||
canonical: lestnitsa
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *lestnitsa
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *lestnitsa_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *sever
|
||||
- alternative: *vostok
|
||||
- alternative: *yug
|
||||
- alternative: *zapad
|
||||
|
||||
po_boxes:
|
||||
abonementnyy_pochtovyy_yashchik: &abonementnyy_pochtovyy_yashchik
|
||||
canonical: абонементный почтовый ящик
|
||||
abbreviated: а/я
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
abonementnyy_pochtovyy_yashchik_latin: &abonementnyy_pochtovyy_yashchik_latin
|
||||
canonical: abonementnyy pochtovyy yashchik
|
||||
abbreviated: a/ya
|
||||
sample: true
|
||||
canonical_probability: 0.2
|
||||
abbreviated_probability: 0.7
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
pochtovyy_yashchik: &pochtovyy_yashchik
|
||||
canonical: абонементный почтовый ящик
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
pochtovyy_yashchik_latin: &pochtovyy_yashchik_latin
|
||||
canonical: pochtovyy yashchik
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
pochtovyy_abonentskiy_yashchik: &pochtovyy_abonentskiy_yashchik
|
||||
canonical: почтовый абонентский ящик
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
pochtovyy_abonentskiy_yashchik_latin: &pochtovyy_abonentskiy_yashchik_latin
|
||||
canonical: pochtovyy abonentskiy yashchik
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *abonementnyy_pochtovyy_yashchik
|
||||
probability: 0.79
|
||||
alternatives:
|
||||
- alternative: *abonementnyy_pochtovyy_yashchik_latin
|
||||
probability: 0.01
|
||||
- alternative: *pochtovyy_yashchik
|
||||
probability: 0.14
|
||||
- alternative: *pochtovyy_yashchik_latin
|
||||
probability: 0.01
|
||||
- alternative: *pochtovyy_abonentskiy_yashchik
|
||||
probability: 0.04
|
||||
- alternative: *pochtovyy_abonentskiy_yashchik_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.9 # а/я 123
|
||||
alpha_probability: 0.05 # а/я А
|
||||
numeric_plus_alpha_probability: 0.04 # а/я 123А
|
||||
alpha_plus_numeric_probability: 0.01 # а/я А123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
kvartira: &kvartira
|
||||
canonical: квартира
|
||||
abbreviated: кв
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
kvartira_latin: &kvartira_latin
|
||||
canonical: kvartira
|
||||
abbreviated: kv
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
kabinet: &kabinet
|
||||
canonical: кабинет
|
||||
abbreviated: каб
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
kabinet_latin: &kabinet_latin
|
||||
canonical: kabinet
|
||||
abbreviated: kab
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
litera: &litera
|
||||
canonical: литера
|
||||
abbreviated: лит
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
litera_latin: &litera_latin
|
||||
canonical: litera
|
||||
abbreviated: lit
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
ofis: &ofis
|
||||
canonical: офис
|
||||
abbreviated: оф
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
ofis_latin: &ofis_latin
|
||||
canonical: ofis
|
||||
abbreviated: of
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
pomeshhenie: &pomeshhenie
|
||||
canonical: помещение
|
||||
abbreviated: пом
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
pomeshhenie_latin: &pomeshhenie_latin
|
||||
canonical: pomeshhenie
|
||||
abbreviated: pom
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *kvartira
|
||||
probability: 0.89
|
||||
alternatives:
|
||||
- alternative: *kvartira_latin
|
||||
probability: 0.01
|
||||
- alternative: *pomeshhenie
|
||||
probability: 0.09
|
||||
- alternative: *pomeshhenie_latin
|
||||
probability: 0.01
|
||||
|
||||
numeric_probability: 0.9 # e.g. кв 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||
alpha_probability: 0.04 # e.g. кв А
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
alpha:
|
||||
default: *kvartira
|
||||
probability: 0.79
|
||||
alternatives:
|
||||
- alternative: *kvartira_latin
|
||||
probability: 0.01
|
||||
- alternative: *pomeshhenie
|
||||
probability: 0.09
|
||||
- alternative: *pomeshhenie_latin
|
||||
probability: 0.01
|
||||
- alternative: *litera
|
||||
probability: 0.09
|
||||
- alternative: *litera_latin
|
||||
probability: 0.01
|
||||
|
||||
|
||||
zones:
|
||||
commercial:
|
||||
default: *kabinet
|
||||
probability: 0.59
|
||||
alternatives:
|
||||
- alternative: *kabinet_latin
|
||||
probability: 0.01
|
||||
- alternative: *ofis
|
||||
probability: 0.29
|
||||
- alternative: *ofis_latin
|
||||
probability: 0.01
|
||||
- alternative: *pomeshhenie
|
||||
probability: 0.09
|
||||
- alternative: *pomeshhenie_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.95 # e.g. kabinet 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||
alpha_probability: 0.03 # e.g. kab A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *pomeshhenie
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *pomeshhenie_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.95 # e.g. kabinet 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||
alpha_probability: 0.03 # e.g. kab A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
@@ -26,7 +26,7 @@ class AddressConfig(object):
|
||||
self.cache = {}
|
||||
|
||||
for filename in os.listdir(config_dir):
|
||||
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml', 'pl.yaml'):
|
||||
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml', 'pl.yaml', 'ru.yaml'):
|
||||
continue
|
||||
|
||||
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))
|
||||
|
||||
Reference in New Issue
Block a user