[addresses] Turkish address config

This commit is contained in:
Al
2016-07-09 15:28:22 -04:00
parent 7e89f3b512
commit dda0455dd3

503
resources/addresses/tr.yaml Normal file
View File

@@ -0,0 +1,503 @@
# tr.yaml
# -------
# Turkish language specification
components:
level:
null_probability: 0.9
alphanumeric_probability: 0.1
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
null_probability: 0.7
alphanumeric_probability: 0.3
combinations:
-
components:
- house_number
- staircase
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
-
components:
- house_number
- level
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
# For unit types like 2/34
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.005
numbers:
default: &numara
canonical: numara
abbreviated: "no:"
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.6
sample_probability: 0.1
numeric:
direction: left
numeric_affix:
affix: "no:"
whitespace_probability: 0.6
direction: left
numeric_probability: 0.4
numeric_affix_probability: 0.6
alphanumeric_phrase_probability: 0.05
no_number_probability: 0.05
and:
default: &ve
canonical: ve
sample: true
canonical_probability: 0.8
sample_probability: 0.2
cross_streets:
ve: *ve
corner_of: &kose
canonical: köşe
sample: true
canonical_probability: 0.8
sample_probability: 0.2
kosesinde: &kosesinde
canonical: köşesinde
sample: true
canonical_probability: 0.8
sample_probability: 0.2
intersection:
default: *ve
probability: 0.8
alternatives:
- alternative: *kose
probability: 0.1
- alternative: *kosesinde
probability: 0.1
arasinda: &arasinda
canonical: arasında
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probability: 0.5
between:
default: *arasinda
levels:
kat: &kat
canonical: kat
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
direction_probability: 0.9
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
add_number_phrase: true
add_number_phrase_probability: 0.1
ordinal:
direction: right
digits:
ascii_probability: 0.3
roman_numeral_probability: 0.7
add_number_phrase: true
add_number_phrase_probability: 0.1
numeric_probability: 0.4
ordinal_probability: 0.6
zemin_kat: &zemin_kat
canonical: zemin kat
abbreviated: zk
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
asma_kat: &asma_kat
canonical: asma kat
half_floors: true
canonical_probability: 0.8
sample_probability: 0.2
sample: true
# e.g. asma kat 2
numeric:
direction: left
# e.g. 2. asma kat
ordinal:
direction: right
numeric_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
bodrum: &bodrum
canonical: bodrum
sample: true
canonical_probability: 0.7
sample_probability: 0.3
# e.g. bodrum 1
numeric:
direction: left
direction_probability: 0.8
# e.g. 1. bodrum
ordinal:
direction: right
digits:
ascii_probability: 0.7
roman_numeral_probability: 0.3
standalone_probability: 0.99
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
ordinal_probability: 0.005
aliases:
"<-1":
default: *bodrum
"-1":
default: *bodrum
# Special token for half-floors
half_floors:
default: *asma_kat
"0":
default: *zemin_kat
probability: 0.9
alternatives:
- alternative: *kat
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *kat
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
directions:
right: &sag
canonical: sağ
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
left: &sol
canonical: sol
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *sag
probability: 0.5
- alternative: *sol
probability: 0.5
cardinal_directions:
east: &dogu
canonical: doğu
abbreviated: d
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: d
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &bati
canonical: batı
abbreviated: b
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: b
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &kuzey
canonical: kuzey
abbreviated: k
canonical_probability: 0.95
abbreviated_probability: 0.05
numeric:
direction: right
numeric_affix:
affix: k
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &guney
canonical: güney
abbreviated: g
sample: true
canonical_probability: 0.75
abbreviated_probability: 0.1
sample_probability: 0.15
numeric:
direction: right
numeric_affix:
affix: g
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *kuzey
probability: 0.25
- alternative: *dogu
probability: 0.23
- alternative: *guney
probability: 0.23
- alternative: *bati
probability: 0.23
entrances:
giris: &giris
canonical: giriş
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# giriş 1, giriş A, etc.
alphanumeric: &entrance_alphanumeric
default: *giris
numeric_probability: 0.1 # e.g. giriş 1
alpha_probability: 0.85 # e.g. giriş A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
staircases:
merdiven: &merdiven
canonical: merdiven
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &staircase_alphanumeric
default: *merdiven
numeric_probability: 0.75
alpha_probability: 0.2
numeric_plus_alpha_probability: 0.025
alpha_plus_numeric_probability: 0.025
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right
direction_probability: 0.85
modifier:
alternatives:
- alternative: *sag
probability: 0.2
- alternative: *sol
probability: 0.2
- alternative: *kuzey
probability: 0.15
- alternative: *guney
probability: 0.15
- alternative: *dogu
probability: 0.15
- alternative: *bati
probability: 0.15
po_boxes:
posta_kutusu: &posta_kutusu
canonical: posta kutusu
abbreviated: pk
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.4
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.2
alphanumeric:
default: *posta_kutusu
numeric_probability: 0.9 # pp 123
alpha_probability: 0.05 # p.p A
numeric_plus_alpha_probability: 0.04 # pp 123G
alpha_plus_numeric_probability: 0.01 # pp A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
daire: &daire
canonical: daire
abbreviated: d
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
apartman: &apartman
canonical: apartman
abbreviated: apt
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.2
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
oda: &oda
canonical: oda
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
ofis: &ofis
canonical: ofis
sample: true
canonical_probability: 0.6
sample_probability: 0.4
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.1
alphanumeric: &unit_alphanumeric
default: *daire
probability: 0.6
alternatives:
- alternative: *apartman
probability: 0.3
- alternative: *oda
probability: 0.1
numeric_probability: 0.9 # e.g. d. 1
numeric_plus_alpha_probability: 0.03 # e.g. 1A
alpha_plus_numeric_probability: 0.03 # e.g. A1
alpha_probability: 0.04 # e.g. daire A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.05
zones:
commercial: &commercial_unit_types
default: *oda
probability: 0.6
alternatives:
- alternative: *ofis
probability: 0.4
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *oda
numeric_probability: 0.95 # e.g. oda 1
numeric_plus_alpha_probability: 0.01 # e.g. oda 1A
alpha_plus_numeric_probability: 0.01 # e.g. oda A1
alpha_probability: 0.03 # e.g. oda A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1