From dda0455dd357ae23dfa57da234ab3997e247b189 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 9 Jul 2016 15:28:22 -0400 Subject: [PATCH] [addresses] Turkish address config --- resources/addresses/tr.yaml | 503 ++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 resources/addresses/tr.yaml diff --git a/resources/addresses/tr.yaml b/resources/addresses/tr.yaml new file mode 100644 index 00000000..c91d050c --- /dev/null +++ b/resources/addresses/tr.yaml @@ -0,0 +1,503 @@ +# tr.yaml +# ------- +# Turkish language specification + +components: + level: + null_probability: 0.9 + alphanumeric_probability: 0.1 + + staircase: + null_probability: 0.99 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + null_probability: 0.7 + alphanumeric_probability: 0.3 + + combinations: + - + components: + - house_number + - staircase + - level + - unit + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.005 + - + components: + - house_number + - level + - unit + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.005 + - + components: + - house_number + - level + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.1 + # For unit types like 2/34 + - + components: + - house_number + - unit + label: house_number + separators: + - separator: "/" + probability: 0.95 + - separator: "-" + probability: 0.05 + probability: 0.005 + + +numbers: + + default: &numara + canonical: numara + abbreviated: "no:" + sample: true + canonical_probability: 0.3 + abbreviated_probability: 0.6 + sample_probability: 0.1 + numeric: + direction: left + numeric_affix: + affix: "no:" + whitespace_probability: 0.6 + direction: left + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + alphanumeric_phrase_probability: 0.05 + no_number_probability: 0.05 + + +and: + default: &ve + canonical: ve + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + + +cross_streets: + ve: *ve + corner_of: &kose + canonical: köşe + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + kosesinde: &kosesinde + canonical: köşesinde + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + intersection: + default: *ve + probability: 0.8 + alternatives: + - alternative: *kose + probability: 0.1 + - alternative: *kosesinde + probability: 0.1 + + arasinda: &arasinda + canonical: arasında + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + parentheses_probability: 0.5 + between: + default: *arasinda + +levels: + kat: &kat + canonical: kat + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + direction_probability: 0.9 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 + add_number_phrase: true + add_number_phrase_probability: 0.1 + ordinal: + direction: right + digits: + ascii_probability: 0.3 + roman_numeral_probability: 0.7 + add_number_phrase: true + add_number_phrase_probability: 0.1 + numeric_probability: 0.4 + ordinal_probability: 0.6 + + zemin_kat: &zemin_kat + canonical: zemin kat + abbreviated: zk + sample: true + canonical_probability: 0.3 + abbreviated_probability: 0.4 + sample_probability: 0.3 + asma_kat: &asma_kat + canonical: asma kat + half_floors: true + canonical_probability: 0.8 + sample_probability: 0.2 + sample: true + # e.g. asma kat 2 + numeric: + direction: left + # e.g. 2. asma kat + ordinal: + direction: right + numeric_probability: 0.1 + ordinal_probability: 0.2 + standalone_probability: 0.6 + bodrum: &bodrum + canonical: bodrum + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + # e.g. bodrum 1 + numeric: + direction: left + direction_probability: 0.8 + # e.g. 1. bodrum + ordinal: + direction: right + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 + standalone_probability: 0.99 + number_abs_value: true + number_min_abs_value: 1 + numeric_probability: 0.005 + ordinal_probability: 0.005 + + aliases: + "<-1": + default: *bodrum + "-1": + default: *bodrum + # Special token for half-floors + half_floors: + default: *asma_kat + "0": + default: *zemin_kat + probability: 0.9 + alternatives: + - alternative: *kat + probability: 0.1 + + numbering_starts_at: 0 + + alphanumeric: + default: *kat + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + + +directions: + right: &sag + canonical: sağ + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + left: &sol + canonical: sol + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + alternatives: + - alternative: *sag + probability: 0.5 + - alternative: *sol + probability: 0.5 + +cardinal_directions: + east: &dogu + canonical: doğu + abbreviated: d + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: d + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + west: &bati + canonical: batı + abbreviated: b + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: b + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + north: &kuzey + canonical: kuzey + abbreviated: k + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: k + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + south: &guney + canonical: güney + abbreviated: g + sample: true + canonical_probability: 0.75 + abbreviated_probability: 0.1 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: g + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + alternatives: + - alternative: *kuzey + probability: 0.25 + - alternative: *dogu + probability: 0.23 + - alternative: *guney + probability: 0.23 + - alternative: *bati + probability: 0.23 + +entrances: + giris: &giris + canonical: giriş + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # giriş 1, giriş A, etc. + alphanumeric: &entrance_alphanumeric + default: *giris + numeric_probability: 0.1 # e.g. giriş 1 + alpha_probability: 0.85 # e.g. giriş A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + +staircases: + merdiven: &merdiven + canonical: merdiven + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + + alphanumeric: &staircase_alphanumeric + default: *merdiven + numeric_probability: 0.75 + alpha_probability: 0.2 + numeric_plus_alpha_probability: 0.025 + alpha_plus_numeric_probability: 0.025 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: right + direction_probability: 0.85 + modifier: + alternatives: + - alternative: *sag + probability: 0.2 + - alternative: *sol + probability: 0.2 + - alternative: *kuzey + probability: 0.15 + - alternative: *guney + probability: 0.15 + - alternative: *dogu + probability: 0.15 + - alternative: *bati + probability: 0.15 + +po_boxes: + posta_kutusu: &posta_kutusu + canonical: posta kutusu + abbreviated: pk + sample: true + canonical_probability: 0.2 + abbreviated_probability: 0.4 + sample_probability: 0.4 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 + + alphanumeric: + default: *posta_kutusu + numeric_probability: 0.9 # pp 123 + alpha_probability: 0.05 # p.p A + numeric_plus_alpha_probability: 0.04 # pp 123G + alpha_plus_numeric_probability: 0.01 # pp A123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + + +units: + daire: &daire + canonical: daire + abbreviated: d + sample: true + canonical_probability: 0.4 + abbreviated_probability: 0.4 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.1 + apartman: &apartman + canonical: apartman + abbreviated: apt + sample: true + canonical_probability: 0.4 + abbreviated_probability: 0.2 + sample_probability: 0.4 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.1 + + oda: &oda + canonical: oda + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.1 + ofis: &ofis + canonical: ofis + sample: true + canonical_probability: 0.6 + sample_probability: 0.4 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.1 + + alphanumeric: &unit_alphanumeric + default: *daire + probability: 0.6 + alternatives: + - alternative: *apartman + probability: 0.3 + - alternative: *oda + probability: 0.1 + numeric_probability: 0.9 # e.g. d. 1 + numeric_plus_alpha_probability: 0.03 # e.g. 1A + alpha_plus_numeric_probability: 0.03 # e.g. A1 + alpha_probability: 0.04 # e.g. daire A + + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.05 + + zones: + commercial: &commercial_unit_types + default: *oda + probability: 0.6 + alternatives: + - alternative: *ofis + probability: 0.4 + numeric_probability: 0.95 # e.g. oda 1 + numeric_plus_alpha_probability: 0.01 # e.g. oda 1A + alpha_plus_numeric_probability: 0.01 # e.g. oda A1 + alpha_probability: 0.03 # e.g. oda A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + university: + default: *oda + numeric_probability: 0.95 # e.g. oda 1 + numeric_plus_alpha_probability: 0.01 # e.g. oda 1A + alpha_plus_numeric_probability: 0.01 # e.g. oda A1 + alpha_probability: 0.03 # e.g. oda A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1