From 6843f43f82486b21e938718e07f1ec60df62e4e2 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 9 Jul 2016 02:03:51 -0400 Subject: [PATCH] [addresses] Icelandic address config --- resources/addresses/is.yaml | 459 ++++++++++++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 resources/addresses/is.yaml diff --git a/resources/addresses/is.yaml b/resources/addresses/is.yaml new file mode 100644 index 00000000..f16949e6 --- /dev/null +++ b/resources/addresses/is.yaml @@ -0,0 +1,459 @@ +# da.yaml +# ------- +# Danish language specification. + +components: + level: + null_probability: 0.9 + alphanumeric_probability: 0.1 + + staircase: + null_probability: 0.99 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + null_probability: 0.75 + alphanumeric_probability: 0.25 + + combinations: + - + components: + - level + - unit + label: unit + separators: + - separator: "-" + probability: 0.9 + - separator: " - " + probability: 0.1 + probability: 0.005 + - + components: + - entrance + - unit + label: unit + separators: + - separator: "-" + probability: 0.9 + - separator: " - " + probability: 0.1 + probability: 0.001 + + +numbers: + default: &numer + canonical: númer + abbreviated: nr + sample: true + # Probabilities + canonical_probability: 0.3 + abbreviated_probability: 0.5 + sample_probability: 0.2 + sample_exclude: + - "#" + numeric: + direction: left + numeric_affix: + affix: "#" + direction: left + + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + +house_number: + alphanumeric: + default: *numer + + alphanumeric_phrase_probability: 0.0001 + + +and: + default: &og + canonical: og + abbreviated: "&" + canonical_probability: 0.2 + abbreviated_probability: 0.75 + sample: true + sample_probability: 0.05 + +cross_streets: + and: *og + corner_of: &horn_of + canonical: horn af + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + at_the_corner_of: &a_horinu_a + canonical: á horninu á + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + intersection: + default: *og + probability: 0.7 + alternatives: + - alternative: *horn_of + probability: 0.15 + - alternative: *a_horinu_a + probability: 0.15 + + between: + canonical: milli + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + parentheses_probability: 0.5 + +levels: + floor: &haeo + canonical: hæð + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: right + direction_probability: 0.9 + ordinal: + direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 + numeric_probability: 0.4 + ordinal_probability: 0.6 + + jarohaeo: &jarohaeo + canonical: jarðhæð + sample: true + canonical_probability: 0.3 + sample_probability: 0.7 + kjallara: &kjallara + canonical: kjallara + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + # e.g. 1 kjallara + numeric: + direction: right + direction_probability: 0.8 + # e.g. k1 + numeric_affix: + affix: k + direction: left + # e.g. 1. kjallara + ordinal: + direction: right + standalone_probability: 0.985 + number_abs_value: true + number_min_abs_value: 1 + numeric_probability: 0.005 + numeric_affix_probability: 0.005 + ordinal_probability: 0.005 + aliases: + "<-1": + default: *kjallara + "-1": + default: *kjallara + "0": + default: *jarohaeo + + numbering_starts_at: 0 + + alphanumeric: + default: *haeo + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + +categories: + near: + default: + canonical: nálægt + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + + nearby: + default: + canonical: nálægt + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.6 + alternatives: + - alternative: + canonical: nálægt hér + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.2 + - alternative: + canonical: hérna + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + - alternative: + canonical: hér + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + near_me: + default: + canonical: nálægt mér + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + + # Don't worry about agreement + in: + default: + canonical: í + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + + # Probabilities of each phrase + near_probability: 0.35 + nearby_probability: 0.2 + near_me_probability: 0.1 + in_probability: 0.35 + + + +directions: + right: &til_haegri + canonical: til hægri + abbreviated: t.h + sample: true + canonical_probability: 0.2 + abbreviated_probability: 0.5 + sample_probability: 0.3 + numeric: + direction: right + numeric_affix: + affix: t.h + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + left: &til_vinstri + canonical: til vinstri + abbreviated: t.v + sample: true + canonical_probability: 0.1 + abbreviated_probability: 0.6 + sample_probability: 0.3 + numeric: + direction: right + numeric_affix: + affix: t.v + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + alternatives: + - alternative: *til_haegri + probability: 0.5 + - alternative: *til_vinstri + probability: 0.5 + +cardinal_directions: + east: &austur + canonical: austur + abbreviated: a + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: a + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + west: &vestur + canonical: vestur + abbreviated: v + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: v + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + north: &norour + canonical: norður + abbreviated: n + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: n + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + south: &suour + canonical: suður + abbreviated: s + sample: true + canonical_probability: 0.75 + abbreviated_probability: 0.1 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: s + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + alternatives: + - alternative: *norour + probability: 0.25 + - alternative: *austur + probability: 0.25 + - alternative: *suour + probability: 0.25 + - alternative: *vestur + probability: 0.25 + + +entrances: + inngangur: &inngangur + canonical: inngangur + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # Inngangur 1, Inngangur A, etc. + alphanumeric: &entrance_alphanumeric + default: *inngangur + numeric_probability: 0.1 # e.g. Inngangur 1 + alpha_probability: 0.85 # e.g. Inngangur A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + +staircases: + stiege: &stigi + canonical: stigi + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + alphanumeric: &staircase_alphanumeric + default: *stigi + numeric_probability: 0.75 + alpha_probability: 0.2 + numeric_plus_alpha_probability: 0.025 + alpha_plus_numeric_probability: 0.025 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: left + direction_probability: 0.85 + modifier: + alternatives: + - alternative: *norour + - alternative: *suour + - alternative: *austur + - alternative: *vestur + +po_boxes: + postholf: &postholf + canonical: pósthólf + abbreviated: ph + sample: true + canonical_probability: 0.6 + abbreviated_probability: 0.2 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 # Ph Nr 1234 + alphanumeric: + sample: false + default: *postholf + numeric_probability: 0.9 # Ph 123 + alpha_probability: 0.05 # Ph A + numeric_plus_alpha_probability: 0.04 # Ph 123G + alpha_plus_numeric_probability: 0.01 # Ph A123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +units: + ibuo: &ibuo + canonical: íbúð + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + null_phrase_probability: 0.5 + # íbúð nummer 4 + add_number_phrase: true + add_number_phrase_probability: 0.05 + + alphanumeric: &unit_alphanumeric + default: *ibuo + numeric_probability: 0.9 # e.g. íbúð 1 + numeric_plus_alpha_probability: 0.03 # e.g. 1A + alpha_plus_numeric_probability: 0.03 # e.g. A1 + alpha_probability: 0.04 # e.g. íbúð A + + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + # Separate random probability for adding directions like 2R, 2L, etc. + add_direction: true + add_direction_probability: 0.1 + + # Add directions for plain numbers + add_direction_numeric: true + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.1