Files
2025-09-06 22:03:29 -04:00

504 lines
13 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# tr.yaml
# -------
# Turkish language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
default: &numara
canonical: numara
abbreviated: "no:"
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "no:"
whitespace_probability: 0.6
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &ve
canonical: ve
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
ve: *ve
corner_of: &kose
canonical: köşe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
kosesinde: &kosesinde
canonical: köşesinde
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ve
probability: 0.8
alternatives:
- alternative: *kose
probability: 0.1
- alternative: *kosesinde
probability: 0.1
arasinda: &arasinda
canonical: arasında
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *arasinda
levels:
kat: &kat
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
zemin_kat: &zemin_kat
canonical: zemin kat
abbreviated: zk
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
asma_kat: &asma_kat
canonical: asma kat
half_floors: true
canonical_probability: 0.8
sample_probability: 0.2
sample: true
# e.g. asma kat 2
numeric:
direction: left
# e.g. 2. asma kat
ordinal:
direction: right
numeric_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
bodrum: &bodrum
canonical: bodrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. bodrum 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. bodrum
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *bodrum
"-1":
default: *bodrum
# Special token for half-floors
half_floors:
default: *asma_kat
"0":
default: *zemin_kat
probability: 0.9
alternatives:
- alternative: *kat
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *kat
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &sag
canonical: sağ
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &sol
canonical: sol
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *sag
probability: 0.5
- alternative: *sol
probability: 0.5
cardinal_directions:
east: &dogu
canonical: doğu
abbreviated: d
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: d
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &bati
canonical: batı
abbreviated: b
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: b
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &kuzey
canonical: kuzey
abbreviated: k
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: k
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &guney
canonical: güney
abbreviated: g
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: g
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *kuzey
probability: 0.25
- alternative: *dogu
probability: 0.23
- alternative: *guney
probability: 0.23
- alternative: *bati
probability: 0.23
entrances:
giris: &giris
canonical: giriş
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# giriş 1, giriş A, etc.
alphanumeric: &entrance_alphanumeric
default: *giris
numeric_probability: 0.1 # e.g. giriş 1
alpha_probability: 0.85 # e.g. giriş A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
merdiven: &merdiven
canonical: merdiven
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *merdiven
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *sag
probability: 0.2
- alternative: *sol
probability: 0.2
- alternative: *kuzey
probability: 0.15
- alternative: *guney
probability: 0.15
- alternative: *dogu
probability: 0.15
- alternative: *bati
probability: 0.15
po_boxes:
posta_kutusu: &posta_kutusu
canonical: posta kutusu
abbreviated: pk
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *posta_kutusu
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
daire: &daire
canonical: daire
abbreviated: d
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: apartman
abbreviated: apt
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
oda: &oda
canonical: oda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
ofis: &ofis
canonical: ofis
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *daire
probability: 0.6
alternatives:
- alternative: *apartman
probability: 0.3
- alternative: *oda
probability: 0.1
numeric_probability: 0.9 # e.g. d. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. daire A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *oda
probability: 0.6
alternatives:
- alternative: *ofis
probability: 0.4
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *oda
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1