From a0ac1e203b36487727fe70bc1a86eea0be3fa3a3 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 28 Jun 2016 13:18:23 -0400 Subject: [PATCH] [addresses] Estonian address config --- resources/addresses/et.yaml | 470 ++++++++++++++++++++++++++++++++++++ 1 file changed, 470 insertions(+) create mode 100644 resources/addresses/et.yaml diff --git a/resources/addresses/et.yaml b/resources/addresses/et.yaml new file mode 100644 index 00000000..9e626c4a --- /dev/null +++ b/resources/addresses/et.yaml @@ -0,0 +1,470 @@ +# et.yaml +# ------- +# Estonian language specification. + +components: + level: + null_probability: 0.97 + alphanumeric_probability: 0.02 + standalone_probability: 0.01 + + staircase: + null_probability: 0.99 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + null_probability: 0.75 + alphanumeric_probability: 0.25 + + combinations: + house_number_unit: + components: + - house_number + - unit + label: house_number + separators: + - separator: "-" + probability: 0.95 + - separator: " - " + probability: 0.05 + probability: 0.7 + + +numbers: + default: &number + canonical: number + abbreviated: nbr + sample: true + # Probabilities + canonical_probability: 0.3 + abbreviated_probability: 0.5 + sample_probability: 0.2 + sample_exclude: + - "#" + numeric: + direction: left + numeric_affix: + affix: "#" + direction: left + + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + +house_number: + alphanumeric: + default: *number + + alphanumeric_phrase_probability: 0.0001 + + +and: + default: &ja + canonical: ja + abbreviated: "&" + canonical_probability: 0.2 + abbreviated_probability: 0.75 + sample: true + sample_probability: 0.05 + +cross_streets: + and: *ja + corner_of: &nurgas + canonical: nurgas + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + at_the_corner_of: &nurgal + canonical: nurgal + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + intersection: + default: *ja + probability: 0.7 + alternatives: + - alternative: *nurgas + probability: 0.15 + - alternative: *nurgal + probability: 0.15 + + between: + canonical: vahel + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + parentheses_probability: 0.5 + +levels: + floor: &korrusel + canonical: korrusel + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + direction_probability: 0.9 + ordinal: + direction: right + numeric_probability: 0.4 + ordinal_probability: 0.6 + parter: &parter + canonical: parter + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + kelder: &kelder + canonical: kelder + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + standalone_probability: 1.0 + keldris: &keldris + canonical: keldris + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + # e.g. 1 keldris + numeric: + direction: right + direction_probability: 0.8 + # e.g. k1 + numeric_affix: + affix: k + direction: left + # e.g. 1. keldris + ordinal: + direction: right + number_abs_value: true + number_min_abs_value: 1 + numeric_probability: 0.05 + numeric_affix_probability: 0.9 + ordinal_probability: 0.05 + aliases: + "<-1": + default: *kelder + probability: 0.85 + alternatives: + - alternative: *keldris + probability: 0.15 + "-1": + default: *kelder + probability: 0.85 + alternatives: + - alternative: *keldris + probability: 0.1 + - alternative: *korrusel + probability: 0.05 + "1": + default: *parter + probability: 0.5 + alternatives: + - alternative: *korrusel + probability: 0.5 + + numbering_starts_at: 1 + + alphanumeric: + default: *korrusel + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + + + + +categories: + near: + default: + canonical: lähedal + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + nearby: + default: + canonical: lähedal + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.7 + alternatives: + - alternative: + canonical: siin lähedal + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.2 + - alternative: + canonical: siinkandis + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + + near_me: + default: + canonical: lähedal mulle + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + + + # Probabilities of each phrase + near_probability: 0.7 + nearby_probability: 0.2 + near_me_probability: 0.1 + +directions: + right: &paremal + canonical: paremal + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: right + numeric_affix: + affix: p + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + paramale: &paremale + canonical: paremale + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: right + numeric_affix: + affix: p + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + left: &vasakul + canonical: vasakul + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: right + numeric_affix: + affix: v + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + vasakule: &vasakule + canonical: vasakule + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: right + numeric_affix: + affix: v + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + alternatives: + - alternative: *paremal + probability: 0.25 + - alternative: *paremale + probability: 0.25 + - alternative: *vasakul + probability: 0.25 + - alternative: *vasakule + probability: 0.25 + +cardinal_directions: + east: &ida + canonical: ida + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + west: &laas + canonical: lääs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + north: &pohi + canonical: põhi + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + + south: &louna + canonical: lõuna + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + + alternatives: + - alternative: *pohi + probability: 0.25 + - alternative: *ida + probability: 0.25 + - alternative: *louna + probability: 0.25 + - alternative: *laas + probability: 0.25 + + +entrances: + sissepaas: &sissepaas + canonical: sissepääs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # Eingang 1, Eingang A, etc. + alphanumeric: &entrance_alphanumeric + default: *sissepaas + numeric_probability: 0.1 # e.g. Eingang 1 + alpha_probability: 0.85 # e.g. Eingang A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + +staircases: + trepikoda: &trepikoda + canonical: trepikoda + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + alphanumeric: &staircase_alphanumeric + default: *trepikoda + numeric_probability: 0.75 + alpha_probability: 0.2 + numeric_plus_alpha_probability: 0.025 + alpha_plus_numeric_probability: 0.025 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: left + direction_probability: 0.85 + modifier: + alternatives: + - alternative: *pohi + - alternative: *louna + - alternative: *ida + - alternative: *laas + +po_boxes: + postboks: &abonementpostkast + canonical: abonementpostkast + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 # abonementpostkast #1234 + kast: &kast + canonical: kast + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 # Kast #1234 + alphanumeric: + sample: false + default: *abonementpostkast + probability: 0.9 + alternatives: + - alternative: *kast + probability: 0.1 + numeric_probability: 0.9 # 123 + alpha_probability: 0.05 # A + numeric_plus_alpha_probability: 0.04 # 123G + alpha_plus_numeric_probability: 0.01 # A123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +units: + korter: &korter + canonical: korter + abbreviated: k + sample: true + canonical_probability: 0.8 + abbreviated_probability: 0.1 + sample_probability: 0.1 + numeric: + direction: left + null_phrase_probability: 0.3 + # Lejlighed nummer 4 + add_number_phrase: true + add_number_phrase_probability: 0.05 + ruumi: &ruumi + canonical: ruumi + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + alphanumeric: &unit_alphanumeric + default: *korter + probability: 0.9 + alternatives: + - alternative: *ruumi + probability: 0.1 + numeric_probability: 1.0 # e.g. korter 1 + + # Separate random probability for adding directions like 2P, 2V, etc. + add_direction: true + add_direction_probability: 0.005 + + # Add directions for plain numbers + add_direction_numeric: true + # Add direction only e.g. Korter vasakule + add_direction_standalone: true + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.05