From 6f199ce870d24d93553c05257430f7c7b0ad4727 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 4 Jul 2016 18:13:57 -0400 Subject: [PATCH] [addresses] Adding Latvian address config --- resources/addresses/lt.yaml | 6 +- resources/addresses/lv.yaml | 403 ++++++++++++++++++++++++++++++++++++ 2 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 resources/addresses/lv.yaml diff --git a/resources/addresses/lt.yaml b/resources/addresses/lt.yaml index 2996535b..4d59fb1e 100644 --- a/resources/addresses/lt.yaml +++ b/resources/addresses/lt.yaml @@ -305,7 +305,7 @@ po_boxes: numeric: direction: left add_number_phrase: true - add_number_phrase_probability: 0.2 # Skr Poczt 1234 + add_number_phrase_probability: 0.2 # pašto dėžutė 1234 alphanumeric: default: *pasto_dezute numeric_probability: 0.95 # P. d. 123 @@ -355,10 +355,10 @@ units: direction: left alphanumeric: &unit_alphanumeric default: *butas - numeric_probability: 0.9 # e.g. m. 1 + numeric_probability: 0.9 # e.g. butas 1 numeric_plus_alpha_probability: 0.03 # e.g. 1A alpha_plus_numeric_probability: 0.03 # e.g. A1 - alpha_probability: 0.04 # e.g. m. A + alpha_probability: 0.04 # e.g. butas A alpha_plus_numeric: whitespace_probability: 0.1 diff --git a/resources/addresses/lv.yaml b/resources/addresses/lv.yaml new file mode 100644 index 00000000..47911458 --- /dev/null +++ b/resources/addresses/lv.yaml @@ -0,0 +1,403 @@ +# lv.yaml +# ------- +# Latvian language specification. + +components: + level: + null_probability: 0.97 + alphanumeric_probability: 0.02 + standalone_probability: 0.01 + + staircase: + null_probability: 0.99 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + null_probability: 0.75 + alphanumeric_probability: 0.25 + + combinations: + - + components: + - house_number + - unit + label: house_number + separators: + - separator: "-" + probability: 0.95 + - separator: " - " + probability: 0.05 + probability: 0.2 + + +numbers: + default: &numurs + canonical: numurs + abbreviated: nr + sample: true + # Probabilities + canonical_probability: 0.3 + abbreviated_probability: 0.5 + sample_probability: 0.2 + sample_exclude: + - "#" + numeric: + direction: left + numeric_affix: + affix: "#" + direction: left + + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + +and: + default: &un + canonical: un + abbreviated: "&" + canonical_probability: 0.2 + abbreviated_probability: 0.75 + sample: true + sample_probability: 0.05 + + +cross_streets: + and: *un + corner_of: &sturis + canonical: stūris + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + at_the_corner_of: &sturi + canonical: stūrī + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + intersection: + default: *un + probability: 0.7 + alternatives: + - alternative: *sturi + probability: 0.2 + - alternative: *sturis + probability: 0.1 + + between: + canonical: starp + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + parentheses_probability: 0.5 + + +levels: + stavs: &stavs + canonical: stāvs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + direction_probability: 0.9 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 + ordinal: + direction: right + whitespace_probability: 0.5 # sometimes should be 2.stāvs + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 + # Needs to be 1.0 so we don't get e.g. IIstāvs + ordinal_suffix_probability: 1.0 + numeric_probability: 0.2 + ordinal_probability: 0.8 + + # Ground floor + pirmais_stavs: &pirmais_stavs + canonical: pirmais stāvs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + pagrabs: &pagrabs + canonical: pagrabs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + standalone_probability: 1.0 + number_abs_value: true + number_min_abs_value: 1 + pagraba: &pagraba + canonical: pagraba + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + # e.g. pagraba 1 + numeric: + direction: left + direction_probability: 0.8 + # e.g. p1 + numeric_affix: + affix: p + direction: left + # e.g. 1. pagraba + ordinal: + direction: right + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 + number_abs_value: true + number_min_abs_value: 1 + numeric_probability: 0.5 + numeric_affix_probability: 0.1 + ordinal_probability: 0.4 + aliases: + "<-1": + default: *pagraba + "-1": + default: *pagrabs + "0": &ground_floor + default: *pirmais_stavs + probability: 0.6 + alternatives: + - alternative: *stavs + probability: 0.4 + "1": *ground_floor + + numbering_starts_at: 1 + + alphanumeric: + default: *stavs + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + +directions: + right: &pa_labi + canonical: pa labi + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + left: &pa_kreisi + canonical: pa kreisi + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + alternatives: + - alternative: *pa_labi + probability: 0.5 + - alternative: *pa_kreisi + probability: 0.5 + +cardinal_directions: + east: &austrumu + canonical: austrumu + abbreviated: a + sample: true + canonical_probability: 0.8 + abbreviated_probability: 0.05 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: a + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + west: &rietumu + canonical: rietumu + abbreviated: r + sample: true + canonical_probability: 0.8 + abbreviated_probability: 0.05 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: r + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + north: &ziemelu + canonical: ziemeļu + abbreviated: z + sample: true + canonical_probability: 0.8 + abbreviated_probability: 0.05 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: z + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + + south: &dienvidu + canonical: dienvidu + abbreviated: d + sample: true + canonical_probability: 0.8 + abbreviated_probability: 0.05 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: d + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + alternatives: + - alternative: *ziemelu + probability: 0.25 + - alternative: *dienvidu + probability: 0.25 + - alternative: *austrumu + probability: 0.25 + - alternative: *rietumu + probability: 0.25 + + +entrances: + ieeja: &ieeja + canonical: ieeja + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # ieeja 1, ieeja A, etc. + alphanumeric: &entrance_alphanumeric + default: *ieeja + numeric_probability: 0.1 # e.g. ieeja 1 + alpha_probability: 0.85 # e.g. ieeja A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + +staircases: + kapnu: &kapnu + canonical: kāpņu + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + kapnu_telpa: &kapnu_telpa + canonical: kāpņu telpa + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + alphanumeric: &staircase_alphanumeric + default: *kapnu + probability: 0.6 + alternatives: + - alternative: *kapnu_telpa + probability: 0.4 + numeric_probability: 0.75 + alpha_probability: 0.2 + numeric_plus_alpha_probability: 0.025 + alpha_plus_numeric_probability: 0.025 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: left + direction_probability: 0.85 + modifier: + alternatives: + - alternative: *pa_labi + - alternative: *pa_kreisi + - alternative: *ziemelu + - alternative: *dienvidu + - alternative: *austrumu + - alternative: *rietumu + + +units: + dzivoklis: &dzivoklis + canonical: dzīvoklis + abbreviated: dz + sample: true + canonical_probability: 0.1 + abbreviated_probability: 0.8 + sample_probability: 0.1 + numeric: + direction: left + birojs: &birojs + canonical: birojs + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + istaba: &istaba + canonical: istaba + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + alphanumeric: &unit_alphanumeric + default: *dzivoklis + numeric_probability: 0.9 # e.g. m. 1 + numeric_plus_alpha_probability: 0.03 # e.g. 1A + alpha_plus_numeric_probability: 0.03 # e.g. A1 + alpha_probability: 0.04 # e.g. m. A + + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.01 + + zones: + commercial: &commercial_unit_types + default: *birojs + numeric_probability: 0.95 # e.g. birojs 1 + numeric_plus_alpha_probability: 0.01 # e.g. birojs 1A + alpha_plus_numeric_probability: 0.01 # e.g. birojs A1 + alpha_probability: 0.03 # e.g. birojs A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + university: + default: *istaba + numeric_probability: 0.95 # e.g. istaba 1 + numeric_plus_alpha_probability: 0.01 # e.g. istaba 1A + alpha_plus_numeric_probability: 0.01 # e.g. istaba A1 + alpha_probability: 0.03 # e.g. istaba A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1