[addresses] Ukrainian address config
This commit is contained in:
983
resources/addresses/uk.yaml
Normal file
983
resources/addresses/uk.yaml
Normal file
@@ -0,0 +1,983 @@
|
||||
# uk.yaml
|
||||
# -------
|
||||
# Ukranian language specification
|
||||
|
||||
alphabet: абвгґдеєжзиіїйклмнопрстуфхцчшщьюя
|
||||
alphabet_probability: 0.7
|
||||
|
||||
components:
|
||||
level:
|
||||
null_probability: 0.95
|
||||
alphanumeric_probability: 0.04
|
||||
standalone_probability: 0.01
|
||||
|
||||
staircase:
|
||||
null_probability: 0.99
|
||||
alphanumeric_probability: 0.01
|
||||
|
||||
entrance:
|
||||
null_probability: 0.999
|
||||
alphanumeric_probability: 0.001
|
||||
|
||||
unit:
|
||||
null_probability: 0.6
|
||||
alphanumeric_probability: 0.4
|
||||
|
||||
|
||||
numbers:
|
||||
default: &nomer
|
||||
canonical: номер
|
||||
abbreviated: №
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
probability: 0.95
|
||||
alternatives:
|
||||
- alternative: &nomer_latin
|
||||
canonical: nomer
|
||||
abbreviated: "no"
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
probability: 0.05
|
||||
|
||||
|
||||
house_number:
|
||||
budnyok: &budnyok
|
||||
canonical: будинок
|
||||
abbreviated: буд
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
budnyok_latin: &budnyok_latin
|
||||
canonical: budnyok
|
||||
abbreviated: bud
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
dom: &dom
|
||||
canonical: дом
|
||||
abbreviated: д
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
dom_latin: &dom_latin
|
||||
canonical: dom
|
||||
abbreviated: d
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric:
|
||||
default: *budnyok
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *budnyok_latin
|
||||
probability: 0.05
|
||||
- alternative: *dom
|
||||
probability: 0.35
|
||||
- alternative: *dom_latin
|
||||
probability: 0.05
|
||||
|
||||
# Very common in Ukranian to write bud/dom
|
||||
alphanumeric_phrase_probability: 0.6
|
||||
|
||||
|
||||
|
||||
and:
|
||||
default: &i
|
||||
canonical: і
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: &i_latin
|
||||
canonical: i
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.1
|
||||
|
||||
|
||||
cross_streets:
|
||||
i: *i
|
||||
i_latin: *i_latin
|
||||
kut: &kut
|
||||
canonical: кут
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
kut_latin: &kut_latin
|
||||
canonical: kut
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
rozi: &rozi
|
||||
canonical: розі
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
rozi_latin: &rozi_latin
|
||||
canonical: rozi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_rozi: &na_rozi
|
||||
canonical: на розі
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
na_rozi_latin: &na_rozi_latin
|
||||
canonical: na rozi
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
intersection:
|
||||
default: *i
|
||||
probability: 0.65
|
||||
alternatives:
|
||||
- alternative: *i_latin
|
||||
probability: 0.05
|
||||
- alternative: *rozi
|
||||
probability: 0.075
|
||||
- alternative: *rozi_latin
|
||||
probability: 0.075
|
||||
- alternative: *na_rozi
|
||||
probability: 0.05
|
||||
- alternative: *na_rozi_latin
|
||||
probability: 0.05
|
||||
- alternative: *kut
|
||||
probability: 0.025
|
||||
- alternative: *kut_latin
|
||||
probability: 0.025
|
||||
mizh: &mizh
|
||||
canonical: між
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
mizh_latin: &mizh_latin
|
||||
canonical: mizh
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
parentheses_probability: 0.5
|
||||
between:
|
||||
default: *mizh
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *mizh_latin
|
||||
probability: 0.1
|
||||
|
||||
|
||||
levels:
|
||||
poverkh: &poverkh
|
||||
canonical: поверх
|
||||
abbreviated: пов
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
poverkh_latin: &poverkh_latin
|
||||
canonical: poverkh
|
||||
abbreviated: pov
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.3
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
riven: &riven
|
||||
canonical: рівень
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
riven_latin: &riven_latin
|
||||
canonical: riven'
|
||||
sample: true
|
||||
canonical_probability: 0.7
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
ordinal:
|
||||
direction: right
|
||||
numeric_probability: 0.4
|
||||
ordinal_probability: 0.6
|
||||
pershyy_poverkh: &pershyy_poverkh
|
||||
canonical: перший поверх
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
pershyy_poverkh_latin: &pershyy_poverkh_latin
|
||||
canonical: pershyy poverkh
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
nyzhniy_poverkh: &nyzhniy_poverkh
|
||||
canonical: нижній поверх
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
nyzhniy_poverkh_latin: &nyzhniy_poverkh_latin
|
||||
canonical: nyzhniy poverkh
|
||||
sample: true
|
||||
canonical_probability: 0.9
|
||||
sample_probability: 0.1
|
||||
|
||||
tsokolnyy_poverkh: &tsokolnyy_poverkh
|
||||
canonical: цокольний поверх
|
||||
abbreviated: цок пов
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
tsokolnyy_poverkh_latin: &tsokolnyy_poverkh_latin
|
||||
canonical: tsokolʹnyy poverkh
|
||||
abbreviated: tsok pov
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
pidval: &pidval
|
||||
canonical: підвал
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
numeric_affix:
|
||||
affix: п
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
# Basement 2 == Sub-basement 1
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
pidval_latin: &pidval_latin
|
||||
canonical: pidval
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
direction_probability: 0.9
|
||||
numeric_affix:
|
||||
affix: p
|
||||
direction: left
|
||||
ordinal:
|
||||
direction: right
|
||||
ordinal:
|
||||
direction: right
|
||||
number_abs_value: true
|
||||
number_min_abs_value: 2
|
||||
# Basement 2 == Sub-basement 1
|
||||
number_subtract_abs_value: 1
|
||||
standalone_probability: 0.985
|
||||
numeric_probability: 0.005
|
||||
numeric_affix_probability: 0.005
|
||||
ordinal_probability: 0.005
|
||||
|
||||
aliases:
|
||||
"<-1":
|
||||
default: *pidval
|
||||
probability: 0.9
|
||||
alternatives:
|
||||
- alternative: *pidval_latin
|
||||
probability: 0.1
|
||||
"-1": &ground_floor
|
||||
default: *tsokolnyy_poverkh
|
||||
probability: 0.89
|
||||
alternatives:
|
||||
- alternative: *tsokolnyy_poverkh_latin
|
||||
probability: 0.01
|
||||
- alternative: *poverkh
|
||||
probability: 0.09
|
||||
- alternative: *poverkh_latin
|
||||
probability: 0.01
|
||||
"0":
|
||||
default: *pershyy_poverkh
|
||||
probability: 0.6
|
||||
alternatives:
|
||||
- alternative: *pershyy_poverkh_latin
|
||||
probability: 0.05
|
||||
- alternative: *nyzhniy_poverkh
|
||||
probability: 0.2
|
||||
- alternative: *nyzhniy_poverkh_latin
|
||||
probability: 0.05
|
||||
- alternative: *tsokolnyy_poverkh
|
||||
probability: 0.075
|
||||
- alternative: *tsokolnyy_poverkh_latin
|
||||
probability: 0.025
|
||||
numbering_starts_at: 0
|
||||
|
||||
alphanumeric:
|
||||
default: *poverkh
|
||||
probability: 0.8
|
||||
alternatives:
|
||||
- alternative: *poverkh_latin
|
||||
probability: 0.1
|
||||
- alternative: *riven
|
||||
probability: 0.09
|
||||
- alternative: *riven_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.99 # With this probability, pick an integer
|
||||
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||
|
||||
|
||||
|
||||
categories:
|
||||
near:
|
||||
default:
|
||||
canonical: поруч
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.59
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: poruch
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: поблизу
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.19
|
||||
- alternative:
|
||||
canonical: poblyzu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: близько
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: blyzʹko
|
||||
sample: true
|
||||
canonical_probability: 0.6
|
||||
sample_probability: 0.4
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: у
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: u
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: біля
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: bilye
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: поруч з
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: poruch z
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
|
||||
nearby:
|
||||
default:
|
||||
canonical: поблизу
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.64
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: poblyzu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: поруч тут
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.19
|
||||
- alternative:
|
||||
canonical: poruch tut
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: тут
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.09
|
||||
- alternative:
|
||||
canonical: tut
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
- alternative:
|
||||
canonical: поруч
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.04
|
||||
- alternative:
|
||||
canonical: poruch
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
near_me:
|
||||
default:
|
||||
canonical: поруч з мною
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: poruch z mnoyu
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
probability: 0.01
|
||||
in:
|
||||
default:
|
||||
canonical: в
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative:
|
||||
canonical: v
|
||||
probability: 0.01
|
||||
|
||||
# Probabilities of each phrase
|
||||
near_probability: 0.35
|
||||
nearby_probability: 0.2
|
||||
near_me_probability: 0.1
|
||||
in_probability: 0.35
|
||||
|
||||
|
||||
# ru.yaml
|
||||
# -------
|
||||
# Russian language specification
|
||||
|
||||
directions:
|
||||
pravo: &pravo
|
||||
canonical: право
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
pravo_latin: &pravo_latin
|
||||
canonical: pravo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
livo: &livo
|
||||
canonical: ліво
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
livo_latin: &livo_latin
|
||||
canonical: livo
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: right
|
||||
alternatives:
|
||||
- alternative: *pravo
|
||||
probability: 0.49
|
||||
- alternative: *pravo_latin
|
||||
probability: 0.01
|
||||
- alternative: *livo
|
||||
probability: 0.49
|
||||
- alternative: *livo_latin
|
||||
probability: 0.01
|
||||
|
||||
|
||||
|
||||
cardinal_directions:
|
||||
shkid: &shkid
|
||||
canonical: схід
|
||||
abbreviated: с
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: с
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
shkid_latin: &shkid_latin
|
||||
canonical: shkid
|
||||
abbreviated: s
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: s
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
zakhid: &zakhid
|
||||
canonical: захід
|
||||
abbreviated: з
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
zakhid_latin: &zakhid_latin
|
||||
canonical: zakhid
|
||||
abbreviated: z
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
pivnikh: &pivnikh
|
||||
canonical: північ
|
||||
abbreviated: півн
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
pivnikh_latin: &pivnikh_latin
|
||||
canonical: pivnikh
|
||||
abbreviated: pivn
|
||||
canonical_probability: 0.95
|
||||
abbreviated_probability: 0.05
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
pivden: &pivden
|
||||
canonical: південь
|
||||
abbreviated: півд
|
||||
sample: true
|
||||
canonical_probability: 0.75
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.15
|
||||
numeric:
|
||||
direction: right
|
||||
numeric_affix:
|
||||
affix: Ю
|
||||
direction: right
|
||||
numeric_probability: 0.5
|
||||
numeric_affix_probability: 0.5
|
||||
|
||||
pivden_latin: &pivden_latin
|
||||
canonical: pivden'
|
||||
abbreviated: pivd
|
||||
sample: true
|
||||
canonical_probability: 0.55
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.35
|
||||
numeric:
|
||||
direction: right
|
||||
|
||||
alternatives:
|
||||
- alternative: *pivnikh
|
||||
probability: 0.24
|
||||
- alternative: *pivnikh_latin
|
||||
probability: 0.01
|
||||
- alternative: *shkid
|
||||
probability: 0.24
|
||||
- alternative: *shkid_latin
|
||||
probability: 0.01
|
||||
- alternative: *pivden
|
||||
probability: 0.24
|
||||
- alternative: *pivden_latin
|
||||
probability: 0.01
|
||||
- alternative: *zakhid
|
||||
probability: 0.24
|
||||
- alternative: *zakhid_latin
|
||||
probability: 0.01
|
||||
|
||||
entrances:
|
||||
vkhid: &vkhid
|
||||
canonical: вхід
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
vkhod_latin: &vkhid_latin
|
||||
canonical: vkhid
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
# вход 1, вход A, etc.
|
||||
alphanumeric:
|
||||
default: *vkhid
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *vkhid_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.1 # e.g. Vkhid 1
|
||||
alpha_probability: 0.85 # e.g. Vkhid A
|
||||
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
staircases:
|
||||
skhody: &skhody
|
||||
canonical: сходи
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
skhody_latin: &skhody_latin
|
||||
canonical: skhody
|
||||
sample: true
|
||||
canonical_probability: 0.8
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &staircase_alphanumeric
|
||||
default: *skhody
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *skhody_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.75
|
||||
alpha_probability: 0.2
|
||||
numeric_plus_alpha_probability: 0.025
|
||||
alpha_plus_numeric_probability: 0.025
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
directional:
|
||||
direction: left
|
||||
direction_probability: 0.85
|
||||
modifier:
|
||||
alternatives:
|
||||
- alternative: *pivnikh
|
||||
- alternative: *shkid
|
||||
- alternative: *pivden
|
||||
- alternative: *zakhid
|
||||
|
||||
po_boxes:
|
||||
abonementnykh_skrynka: &abonementnykh_skrynka
|
||||
canonical: абонементна скринька
|
||||
abbreviated: а/с
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.2
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
abonementnykh_skrynka_latin: &abonementnykh_skrynka_latin
|
||||
canonical: abonementnykh skrynʹka
|
||||
abbreviated: a/s
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.1
|
||||
sample_probability: 0.5
|
||||
numeric:
|
||||
direction: left
|
||||
add_number_phrase: true
|
||||
add_number_phrase_probability: 0.2
|
||||
|
||||
alphanumeric:
|
||||
default: *abonementnykh_skrynka
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *abonementnykh_skrynka_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.9 # 123
|
||||
alpha_probability: 0.05 # А
|
||||
numeric_plus_alpha_probability: 0.04 # 123А
|
||||
alpha_plus_numeric_probability: 0.01 # А123
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
digits:
|
||||
- length: 1
|
||||
probability: 0.05
|
||||
- length: 2
|
||||
probability: 0.1
|
||||
- length: 3
|
||||
probability: 0.2
|
||||
- length: 4
|
||||
probability: 0.5
|
||||
- length: 5
|
||||
probability: 0.1
|
||||
- length: 6
|
||||
probability: 0.05
|
||||
|
||||
units:
|
||||
kvartyra: &kvartyra
|
||||
canonical: квартира
|
||||
abbreviated: кв
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
kvartyra_latin: &kvartyra_latin
|
||||
canonical: kvartyra
|
||||
abbreviated: kv
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.6
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
kabinet: &kabinet
|
||||
canonical: кабінет
|
||||
abbreviated: каб
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
kabinet_latin: &kabinet_latin
|
||||
canonical: kabinet
|
||||
abbreviated: kab
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
kimnata: &kimnata
|
||||
canonical: кімната
|
||||
abbreviated: км
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
kimnata_latin: &kimnata_latin
|
||||
canonical: kimnata
|
||||
abbreviated: km
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
litera: &litera
|
||||
canonical: літера
|
||||
abbreviated: літ
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
litera_latin: &litera_latin
|
||||
canonical: litera
|
||||
abbreviated: lit
|
||||
sample: true
|
||||
canonical_probability: 0.3
|
||||
abbreviated_probability: 0.4
|
||||
sample_probability: 0.3
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
ofis: &ofis
|
||||
canonical: офіс
|
||||
abbreviated: оф
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
ofis_latin: &ofis_latin
|
||||
canonical: ofis
|
||||
abbreviated: of
|
||||
sample: true
|
||||
canonical_probability: 0.4
|
||||
abbreviated_probability: 0.5
|
||||
sample_probability: 0.1
|
||||
numeric:
|
||||
direction: left
|
||||
|
||||
alphanumeric: &unit_alphanumeric
|
||||
default: *kvartyra
|
||||
probability: 0.89
|
||||
alternatives:
|
||||
- alternative: *kvartyra
|
||||
probability: 0.01
|
||||
- alternative: *kimnata
|
||||
probability: 0.09
|
||||
- alternative: *kimnata_latin
|
||||
probability: 0.01
|
||||
|
||||
numeric_probability: 0.9 # e.g. кв 1
|
||||
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||
alpha_probability: 0.04 # e.g. кв А
|
||||
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||
use_floor_probability: 0.1
|
||||
|
||||
alpha:
|
||||
default: *kvartyra
|
||||
probability: 0.79
|
||||
alternatives:
|
||||
- alternative: *kvartyra_latin
|
||||
probability: 0.01
|
||||
- alternative: *kimnata
|
||||
probability: 0.09
|
||||
- alternative: *kimnata_latin
|
||||
probability: 0.01
|
||||
- alternative: *litera
|
||||
probability: 0.09
|
||||
- alternative: *litera_latin
|
||||
probability: 0.01
|
||||
|
||||
|
||||
zones:
|
||||
commercial:
|
||||
default: *kabinet
|
||||
probability: 0.59
|
||||
alternatives:
|
||||
- alternative: *kabinet_latin
|
||||
probability: 0.01
|
||||
- alternative: *ofis
|
||||
probability: 0.29
|
||||
- alternative: *ofis_latin
|
||||
probability: 0.01
|
||||
- alternative: *kimnata
|
||||
probability: 0.09
|
||||
- alternative: *kimnata_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.95 # e.g. kabinet 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||
alpha_probability: 0.03 # e.g. kab A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
|
||||
university:
|
||||
default: *kimnata
|
||||
probability: 0.99
|
||||
alternatives:
|
||||
- alternative: *kimnata_latin
|
||||
probability: 0.01
|
||||
numeric_probability: 0.95 # e.g. kimnata 1
|
||||
numeric_plus_alpha_probability: 0.01 # e.g. kimnata 1A
|
||||
alpha_plus_numeric_probability: 0.01 # e.g. km A1
|
||||
alpha_probability: 0.03 # e.g. km A
|
||||
alpha_plus_numeric:
|
||||
whitespace_probability: 0.1
|
||||
numeric_plus_alpha:
|
||||
whitespace_probability: 0.1
|
||||
Reference in New Issue
Block a user