[addresses] Ukrainian address config
This commit is contained in:
983
resources/addresses/uk.yaml
Normal file
983
resources/addresses/uk.yaml
Normal file
@@ -0,0 +1,983 @@
|
|||||||
|
# uk.yaml
|
||||||
|
# -------
|
||||||
|
# Ukranian language specification
|
||||||
|
|
||||||
|
alphabet: абвгґдеєжзиіїйклмнопрстуфхцчшщьюя
|
||||||
|
alphabet_probability: 0.7
|
||||||
|
|
||||||
|
components:
|
||||||
|
level:
|
||||||
|
null_probability: 0.95
|
||||||
|
alphanumeric_probability: 0.04
|
||||||
|
standalone_probability: 0.01
|
||||||
|
|
||||||
|
staircase:
|
||||||
|
null_probability: 0.99
|
||||||
|
alphanumeric_probability: 0.01
|
||||||
|
|
||||||
|
entrance:
|
||||||
|
null_probability: 0.999
|
||||||
|
alphanumeric_probability: 0.001
|
||||||
|
|
||||||
|
unit:
|
||||||
|
null_probability: 0.6
|
||||||
|
alphanumeric_probability: 0.4
|
||||||
|
|
||||||
|
|
||||||
|
numbers:
|
||||||
|
default: &nomer
|
||||||
|
canonical: номер
|
||||||
|
abbreviated: №
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
probability: 0.95
|
||||||
|
alternatives:
|
||||||
|
- alternative: &nomer_latin
|
||||||
|
canonical: nomer
|
||||||
|
abbreviated: "no"
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
|
||||||
|
house_number:
|
||||||
|
budnyok: &budnyok
|
||||||
|
canonical: будинок
|
||||||
|
abbreviated: буд
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.6
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
budnyok_latin: &budnyok_latin
|
||||||
|
canonical: budnyok
|
||||||
|
abbreviated: bud
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.6
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
dom: &dom
|
||||||
|
canonical: дом
|
||||||
|
abbreviated: д
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.6
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
dom_latin: &dom_latin
|
||||||
|
canonical: dom
|
||||||
|
abbreviated: d
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.6
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
alphanumeric:
|
||||||
|
default: *budnyok
|
||||||
|
probability: 0.65
|
||||||
|
alternatives:
|
||||||
|
- alternative: *budnyok_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *dom
|
||||||
|
probability: 0.35
|
||||||
|
- alternative: *dom_latin
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
# Very common in Ukranian to write bud/dom
|
||||||
|
alphanumeric_phrase_probability: 0.6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
and:
|
||||||
|
default: &i
|
||||||
|
canonical: і
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: &i_latin
|
||||||
|
canonical: i
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.1
|
||||||
|
|
||||||
|
|
||||||
|
cross_streets:
|
||||||
|
i: *i
|
||||||
|
i_latin: *i_latin
|
||||||
|
kut: &kut
|
||||||
|
canonical: кут
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
kut_latin: &kut_latin
|
||||||
|
canonical: kut
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
rozi: &rozi
|
||||||
|
canonical: розі
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
rozi_latin: &rozi_latin
|
||||||
|
canonical: rozi
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
na_rozi: &na_rozi
|
||||||
|
canonical: на розі
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
na_rozi_latin: &na_rozi_latin
|
||||||
|
canonical: na rozi
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
intersection:
|
||||||
|
default: *i
|
||||||
|
probability: 0.65
|
||||||
|
alternatives:
|
||||||
|
- alternative: *i_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *rozi
|
||||||
|
probability: 0.075
|
||||||
|
- alternative: *rozi_latin
|
||||||
|
probability: 0.075
|
||||||
|
- alternative: *na_rozi
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *na_rozi_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *kut
|
||||||
|
probability: 0.025
|
||||||
|
- alternative: *kut_latin
|
||||||
|
probability: 0.025
|
||||||
|
mizh: &mizh
|
||||||
|
canonical: між
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
parentheses_probability: 0.5
|
||||||
|
mizh_latin: &mizh_latin
|
||||||
|
canonical: mizh
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
parentheses_probability: 0.5
|
||||||
|
between:
|
||||||
|
default: *mizh
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: *mizh_latin
|
||||||
|
probability: 0.1
|
||||||
|
|
||||||
|
|
||||||
|
levels:
|
||||||
|
poverkh: &poverkh
|
||||||
|
canonical: поверх
|
||||||
|
abbreviated: пов
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
poverkh_latin: &poverkh_latin
|
||||||
|
canonical: poverkh
|
||||||
|
abbreviated: pov
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.3
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
riven: &riven
|
||||||
|
canonical: рівень
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.7
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
riven_latin: &riven_latin
|
||||||
|
canonical: riven'
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.7
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.4
|
||||||
|
ordinal_probability: 0.6
|
||||||
|
pershyy_poverkh: &pershyy_poverkh
|
||||||
|
canonical: перший поверх
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.9
|
||||||
|
sample_probability: 0.1
|
||||||
|
pershyy_poverkh_latin: &pershyy_poverkh_latin
|
||||||
|
canonical: pershyy poverkh
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.9
|
||||||
|
sample_probability: 0.1
|
||||||
|
nyzhniy_poverkh: &nyzhniy_poverkh
|
||||||
|
canonical: нижній поверх
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.9
|
||||||
|
sample_probability: 0.1
|
||||||
|
nyzhniy_poverkh_latin: &nyzhniy_poverkh_latin
|
||||||
|
canonical: nyzhniy poverkh
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.9
|
||||||
|
sample_probability: 0.1
|
||||||
|
|
||||||
|
tsokolnyy_poverkh: &tsokolnyy_poverkh
|
||||||
|
canonical: цокольний поверх
|
||||||
|
abbreviated: цок пов
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
tsokolnyy_poverkh_latin: &tsokolnyy_poverkh_latin
|
||||||
|
canonical: tsokolʹnyy poverkh
|
||||||
|
abbreviated: tsok pov
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
pidval: &pidval
|
||||||
|
canonical: підвал
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
numeric_affix:
|
||||||
|
affix: п
|
||||||
|
direction: left
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
number_abs_value: true
|
||||||
|
number_min_abs_value: 2
|
||||||
|
# Basement 2 == Sub-basement 1
|
||||||
|
number_subtract_abs_value: 1
|
||||||
|
standalone_probability: 0.985
|
||||||
|
numeric_probability: 0.005
|
||||||
|
numeric_affix_probability: 0.005
|
||||||
|
ordinal_probability: 0.005
|
||||||
|
pidval_latin: &pidval_latin
|
||||||
|
canonical: pidval
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.9
|
||||||
|
numeric_affix:
|
||||||
|
affix: p
|
||||||
|
direction: left
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
ordinal:
|
||||||
|
direction: right
|
||||||
|
number_abs_value: true
|
||||||
|
number_min_abs_value: 2
|
||||||
|
# Basement 2 == Sub-basement 1
|
||||||
|
number_subtract_abs_value: 1
|
||||||
|
standalone_probability: 0.985
|
||||||
|
numeric_probability: 0.005
|
||||||
|
numeric_affix_probability: 0.005
|
||||||
|
ordinal_probability: 0.005
|
||||||
|
|
||||||
|
aliases:
|
||||||
|
"<-1":
|
||||||
|
default: *pidval
|
||||||
|
probability: 0.9
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pidval_latin
|
||||||
|
probability: 0.1
|
||||||
|
"-1": &ground_floor
|
||||||
|
default: *tsokolnyy_poverkh
|
||||||
|
probability: 0.89
|
||||||
|
alternatives:
|
||||||
|
- alternative: *tsokolnyy_poverkh_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *poverkh
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *poverkh_latin
|
||||||
|
probability: 0.01
|
||||||
|
"0":
|
||||||
|
default: *pershyy_poverkh
|
||||||
|
probability: 0.6
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pershyy_poverkh_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *nyzhniy_poverkh
|
||||||
|
probability: 0.2
|
||||||
|
- alternative: *nyzhniy_poverkh_latin
|
||||||
|
probability: 0.05
|
||||||
|
- alternative: *tsokolnyy_poverkh
|
||||||
|
probability: 0.075
|
||||||
|
- alternative: *tsokolnyy_poverkh_latin
|
||||||
|
probability: 0.025
|
||||||
|
numbering_starts_at: 0
|
||||||
|
|
||||||
|
alphanumeric:
|
||||||
|
default: *poverkh
|
||||||
|
probability: 0.8
|
||||||
|
alternatives:
|
||||||
|
- alternative: *poverkh_latin
|
||||||
|
probability: 0.1
|
||||||
|
- alternative: *riven
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *riven_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.99 # With this probability, pick an integer
|
||||||
|
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
|
||||||
|
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
|
||||||
|
alpha_plus_numeric_probability: 0.0001 # e.g. A2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
categories:
|
||||||
|
near:
|
||||||
|
default:
|
||||||
|
canonical: поруч
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.59
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: poruch
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: поблизу
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.19
|
||||||
|
- alternative:
|
||||||
|
canonical: poblyzu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: близько
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: blyzʹko
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.6
|
||||||
|
sample_probability: 0.4
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: у
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: u
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: біля
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: bilye
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: поруч з
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: poruch z
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
nearby:
|
||||||
|
default:
|
||||||
|
canonical: поблизу
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.64
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: poblyzu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: поруч тут
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.19
|
||||||
|
- alternative:
|
||||||
|
canonical: poruch tut
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: тут
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.09
|
||||||
|
- alternative:
|
||||||
|
canonical: tut
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
- alternative:
|
||||||
|
canonical: поруч
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.04
|
||||||
|
- alternative:
|
||||||
|
canonical: poruch
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
near_me:
|
||||||
|
default:
|
||||||
|
canonical: поруч з мною
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: poruch z mnoyu
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
probability: 0.01
|
||||||
|
in:
|
||||||
|
default:
|
||||||
|
canonical: в
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative:
|
||||||
|
canonical: v
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
# Probabilities of each phrase
|
||||||
|
near_probability: 0.35
|
||||||
|
nearby_probability: 0.2
|
||||||
|
near_me_probability: 0.1
|
||||||
|
in_probability: 0.35
|
||||||
|
|
||||||
|
|
||||||
|
# ru.yaml
|
||||||
|
# -------
|
||||||
|
# Russian language specification
|
||||||
|
|
||||||
|
directions:
|
||||||
|
pravo: &pravo
|
||||||
|
canonical: право
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
pravo_latin: &pravo_latin
|
||||||
|
canonical: pravo
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
livo: &livo
|
||||||
|
canonical: ліво
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
livo_latin: &livo_latin
|
||||||
|
canonical: livo
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pravo
|
||||||
|
probability: 0.49
|
||||||
|
- alternative: *pravo_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *livo
|
||||||
|
probability: 0.49
|
||||||
|
- alternative: *livo_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cardinal_directions:
|
||||||
|
shkid: &shkid
|
||||||
|
canonical: схід
|
||||||
|
abbreviated: с
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: с
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
shkid_latin: &shkid_latin
|
||||||
|
canonical: shkid
|
||||||
|
abbreviated: s
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: s
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
zakhid: &zakhid
|
||||||
|
canonical: захід
|
||||||
|
abbreviated: з
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
|
||||||
|
zakhid_latin: &zakhid_latin
|
||||||
|
canonical: zakhid
|
||||||
|
abbreviated: z
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
|
||||||
|
pivnikh: &pivnikh
|
||||||
|
canonical: північ
|
||||||
|
abbreviated: півн
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
|
||||||
|
pivnikh_latin: &pivnikh_latin
|
||||||
|
canonical: pivnikh
|
||||||
|
abbreviated: pivn
|
||||||
|
canonical_probability: 0.95
|
||||||
|
abbreviated_probability: 0.05
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
|
||||||
|
pivden: &pivden
|
||||||
|
canonical: південь
|
||||||
|
abbreviated: півд
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.75
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.15
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
numeric_affix:
|
||||||
|
affix: Ю
|
||||||
|
direction: right
|
||||||
|
numeric_probability: 0.5
|
||||||
|
numeric_affix_probability: 0.5
|
||||||
|
|
||||||
|
pivden_latin: &pivden_latin
|
||||||
|
canonical: pivden'
|
||||||
|
abbreviated: pivd
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.55
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.35
|
||||||
|
numeric:
|
||||||
|
direction: right
|
||||||
|
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pivnikh
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *pivnikh_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *shkid
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *shkid_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *pivden
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *pivden_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *zakhid
|
||||||
|
probability: 0.24
|
||||||
|
- alternative: *zakhid_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
entrances:
|
||||||
|
vkhid: &vkhid
|
||||||
|
canonical: вхід
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
vkhod_latin: &vkhid_latin
|
||||||
|
canonical: vkhid
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
# вход 1, вход A, etc.
|
||||||
|
alphanumeric:
|
||||||
|
default: *vkhid
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *vkhid_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.1 # e.g. Vkhid 1
|
||||||
|
alpha_probability: 0.85 # e.g. Vkhid A
|
||||||
|
numeric_plus_alpha_probability: 0.025 # e.g. 1A
|
||||||
|
alpha_plus_numeric_probability: 0.025 # e.g. A1
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
staircases:
|
||||||
|
skhody: &skhody
|
||||||
|
canonical: сходи
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
skhody_latin: &skhody_latin
|
||||||
|
canonical: skhody
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.8
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
alphanumeric: &staircase_alphanumeric
|
||||||
|
default: *skhody
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *skhody_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.75
|
||||||
|
alpha_probability: 0.2
|
||||||
|
numeric_plus_alpha_probability: 0.025
|
||||||
|
alpha_plus_numeric_probability: 0.025
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
directional:
|
||||||
|
direction: left
|
||||||
|
direction_probability: 0.85
|
||||||
|
modifier:
|
||||||
|
alternatives:
|
||||||
|
- alternative: *pivnikh
|
||||||
|
- alternative: *shkid
|
||||||
|
- alternative: *pivden
|
||||||
|
- alternative: *zakhid
|
||||||
|
|
||||||
|
po_boxes:
|
||||||
|
abonementnykh_skrynka: &abonementnykh_skrynka
|
||||||
|
canonical: абонементна скринька
|
||||||
|
abbreviated: а/с
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.2
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
abonementnykh_skrynka_latin: &abonementnykh_skrynka_latin
|
||||||
|
canonical: abonementnykh skrynʹka
|
||||||
|
abbreviated: a/s
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.1
|
||||||
|
sample_probability: 0.5
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
add_number_phrase: true
|
||||||
|
add_number_phrase_probability: 0.2
|
||||||
|
|
||||||
|
alphanumeric:
|
||||||
|
default: *abonementnykh_skrynka
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *abonementnykh_skrynka_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.9 # 123
|
||||||
|
alpha_probability: 0.05 # А
|
||||||
|
numeric_plus_alpha_probability: 0.04 # 123А
|
||||||
|
alpha_plus_numeric_probability: 0.01 # А123
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
digits:
|
||||||
|
- length: 1
|
||||||
|
probability: 0.05
|
||||||
|
- length: 2
|
||||||
|
probability: 0.1
|
||||||
|
- length: 3
|
||||||
|
probability: 0.2
|
||||||
|
- length: 4
|
||||||
|
probability: 0.5
|
||||||
|
- length: 5
|
||||||
|
probability: 0.1
|
||||||
|
- length: 6
|
||||||
|
probability: 0.05
|
||||||
|
|
||||||
|
units:
|
||||||
|
kvartyra: &kvartyra
|
||||||
|
canonical: квартира
|
||||||
|
abbreviated: кв
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.6
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kvartyra_latin: &kvartyra_latin
|
||||||
|
canonical: kvartyra
|
||||||
|
abbreviated: kv
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.6
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
kabinet: &kabinet
|
||||||
|
canonical: кабінет
|
||||||
|
abbreviated: каб
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kabinet_latin: &kabinet_latin
|
||||||
|
canonical: kabinet
|
||||||
|
abbreviated: kab
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kimnata: &kimnata
|
||||||
|
canonical: кімната
|
||||||
|
abbreviated: км
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
kimnata_latin: &kimnata_latin
|
||||||
|
canonical: kimnata
|
||||||
|
abbreviated: km
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
litera: &litera
|
||||||
|
canonical: літера
|
||||||
|
abbreviated: літ
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
litera_latin: &litera_latin
|
||||||
|
canonical: litera
|
||||||
|
abbreviated: lit
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.3
|
||||||
|
abbreviated_probability: 0.4
|
||||||
|
sample_probability: 0.3
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
ofis: &ofis
|
||||||
|
canonical: офіс
|
||||||
|
abbreviated: оф
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.5
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
ofis_latin: &ofis_latin
|
||||||
|
canonical: ofis
|
||||||
|
abbreviated: of
|
||||||
|
sample: true
|
||||||
|
canonical_probability: 0.4
|
||||||
|
abbreviated_probability: 0.5
|
||||||
|
sample_probability: 0.1
|
||||||
|
numeric:
|
||||||
|
direction: left
|
||||||
|
|
||||||
|
alphanumeric: &unit_alphanumeric
|
||||||
|
default: *kvartyra
|
||||||
|
probability: 0.89
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kvartyra
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *kimnata
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *kimnata_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
numeric_probability: 0.9 # e.g. кв 1
|
||||||
|
numeric_plus_alpha_probability: 0.03 # e.g. 1А
|
||||||
|
alpha_plus_numeric_probability: 0.03 # e.g. AА1
|
||||||
|
alpha_probability: 0.04 # e.g. кв А
|
||||||
|
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
# If there are 10 floors, create unit numbers like #301 or #1032
|
||||||
|
use_floor_probability: 0.1
|
||||||
|
|
||||||
|
alpha:
|
||||||
|
default: *kvartyra
|
||||||
|
probability: 0.79
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kvartyra_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *kimnata
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *kimnata_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *litera
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *litera_latin
|
||||||
|
probability: 0.01
|
||||||
|
|
||||||
|
|
||||||
|
zones:
|
||||||
|
commercial:
|
||||||
|
default: *kabinet
|
||||||
|
probability: 0.59
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kabinet_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *ofis
|
||||||
|
probability: 0.29
|
||||||
|
- alternative: *ofis_latin
|
||||||
|
probability: 0.01
|
||||||
|
- alternative: *kimnata
|
||||||
|
probability: 0.09
|
||||||
|
- alternative: *kimnata_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.95 # e.g. kabinet 1
|
||||||
|
numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A
|
||||||
|
alpha_plus_numeric_probability: 0.01 # e.g. kab A1
|
||||||
|
alpha_probability: 0.03 # e.g. kab A
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
|
||||||
|
university:
|
||||||
|
default: *kimnata
|
||||||
|
probability: 0.99
|
||||||
|
alternatives:
|
||||||
|
- alternative: *kimnata_latin
|
||||||
|
probability: 0.01
|
||||||
|
numeric_probability: 0.95 # e.g. kimnata 1
|
||||||
|
numeric_plus_alpha_probability: 0.01 # e.g. kimnata 1A
|
||||||
|
alpha_plus_numeric_probability: 0.01 # e.g. km A1
|
||||||
|
alpha_probability: 0.03 # e.g. km A
|
||||||
|
alpha_plus_numeric:
|
||||||
|
whitespace_probability: 0.1
|
||||||
|
numeric_plus_alpha:
|
||||||
|
whitespace_probability: 0.1
|
||||||
Reference in New Issue
Block a user