From a260acf0af10f4601f668e48e01e6e13cacda28e Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 28 Jun 2016 13:20:54 -0400 Subject: [PATCH] [addresses] Romanian address config --- resources/addresses/ro.yaml | 498 ++++++++++++++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 resources/addresses/ro.yaml diff --git a/resources/addresses/ro.yaml b/resources/addresses/ro.yaml new file mode 100644 index 00000000..72addc0d --- /dev/null +++ b/resources/addresses/ro.yaml @@ -0,0 +1,498 @@ +# ro.yaml +# ------- +# Romanian language specification + +components: + level: + # If no floor number is specified + null_probability: 0.6 + alphanumeric_probability: 0.35 + standalone_probability: 0.05 + + staircase: + null_probability: 0.95 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + # If no unit number is specified + null_probability: 0.3 + alphanumeric_probability: 0.65 + standalone_probability: 0.05 + +numbers: + default: &numar + canonical: număr + abbreviated: nr + sample: true + canonical_probability: 0.1 + abbreviated_probability: 0.7 + sample_probability: 0.2 + sample_exclude: + - "#" + numeric: + direction: left + numeric_affix: + affix: "#" # e.g. #3, #2F, etc. + probability: 0.5 + alternatives: + - alternative: + direction: left # affix goes on the number's left + + # Probabilities for numbers + numeric_probability: 0.9 + numeric_affix_probability: 0.1 + +and: + default: &si + canonical: și + abbreviated: "&" + sample: true + canonical_probability: 0.5 + abbreviated_probability: 0.4 + sample_probability: 0.1 + +cross_streets: + and: *si + corner_of: &colt + canonical: colț + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + at_the_corner_of: &la_coltul_de_pe + canonical: la colțul de pe + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + intersection: + default: *si + probability: 0.7 + alternatives: + - alternative: *colt + probability: 0.2 + - alternative: *la_coltul_de_pe + probability: 0.1 + + between: + canonical: între + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + parentheses_probabililty: 0.5 + + +house_numbers: + # fara numar (FN) addresses + no_number: + default: + canonical: fără număr + abbreviated: fn + sample: true + canonical_probability: 0.1 + abbreviated_probability: 0.7 + sample_probability: 0.2 + alphanumeric: + default: *numar + + alphanumeric_phrase_probability: 0.7 + no_number_probability: 0.1 # With this probability, use fara numar if no house_number is specified + + + +levels: + floor: &etaj + canonical: etaj + abbreviated: et + sample: true + canonical_probability: 0.5 + abbreviated_probability: 0.4 + sample_probability: 0.1 + numeric: + direction: left + add_number_phrase: true # Occasionally add variation of "number", e.g. et. nr 2 + add_number_phrase_probability: 0.05 + roman_numeral_probability: 0.2 + # Ground floor + parter: &parter + canonical: parter + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + aliases: + "0": + default: *parter + probability: 0.9 + alternatives: + - alternative: *etaj + probability: 0.1 + + numbering_starts_at: 0 + + alphanumeric: + default: *etaj + add_number_phrase: true + add_number_phrase_probability: 0.05 + numeric_probability: 0.99 + alpha_probability: 0.01 + +blocks: + default: + canonical: bloc + abbreviated: bl + sample: true + canonical_probability: 0.6 + abbreviated_probability: 0.2 + sample_probability: 0.2 + numeric: + direction: left + + + +categories: + near: + default: + canonical: in apropiere de + + nearby: + default: + canonical: în apropiere + probability: 0.5 + alternatives: + - alternative: + canonical: in apropiere + probability: 0.2 + - alternative: + canonical: aproape de aici + probability: 0.1 + - alternative: + canonical: aici + probability: 0.1 + - alternative: + canonical: în jurul aici + probability: 0.05 + - alternative: + canonical: in jurul aici + probability: 0.05 + near_me: + default: + canonical: lângă mine + probability: 0.7 + alternatives: + - alternative: + canonical: langa mine + probability: 0.3 + in: + default: + canonical: din + # Probabilities of each phrase + near_probability: 0.35 + nearby_probability: 0.2 + near_me_probability: 0.1 + in_probability: 0.35 + +directions: + right: &dreapta + canonical: dreapta + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: right + numeric_affix: + affix: d + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + left: &stanga + canonical: stânga + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: right + numeric_affix: + affix: s + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + alternatives: + - alternative: *dreapta + probability: 0.5 + - alternative: *stanga + probability: 0.5 + + +cardinal_directions: + east: &est + canonical: est + abbreviated: e + canonical_probability: 0.4 + abbreviated_probability: 0.6 + numeric: + direction: right + numeric_affix: + affix: e + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + west: &vest + canonical: vest + abbreviated: v + canonical_probability: 0.4 + abbreviated_probability: 0.6 + numeric: + direction: right + numeric_affix: + affix: v + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + north: &nord + canonical: nord + abbreviated: n + canonical_probability: 0.4 + abbreviated_probability: 0.6 + numeric: + direction: right + numeric_affix: + affix: n + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + south: &sud + canonical: sud + abbreviated: s + canonical_probability: 0.4 + abbreviated_probability: 0.6 + numeric: + direction: right + numeric_affix: + affix: s + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + alternatives: + - alternative: *nord + probability: 0.25 + - alternative: *est + probability: 0.25 + - alternative: *sud + probability: 0.25 + - alternative: *vest + probability: 0.25 + +entrances: + entrada: &intrare + canonical: intrare + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + + # Intrare 1, Intare A, etc. + alphanumeric: + default: *intrare + numeric_probability: 0.1 # e.g. Intrare 1 + alpha_probability: 0.85 # e.g. Intrare A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + modifier: + alternatives: + - alternative: *nord + - alternative: *sud + - alternative: *est + - alternative: *vest + - alternative: *dreapta + - alternative: *stanga + +staircases: + scara: &scara + canonical: scara + abbreviated: sc + sample: true + canonical_probability: 0.3 + abbreviated_probability: 0.4 + sample_probability: 0.3 + numeric: + direction: left + + alphanumeric: + # For alphanumerics, Scara A, Scara 1, etc. + default: *scara + numeric_probability: 0.35 # e.g. Scara 1 + alpha_probability: 0.6 # e.g. Scara A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: right # e.g. Scara Nord + direction_probability: 0.8 + modifier: + alternatives: + - alternative: *nord + - alternative: *sud + - alternative: *est + - alternative: *vest + - alternative: *dreapta + - alternative: *stanga + +po_boxes: + casuta_postala: &casuta_postala + canonical: căsuță poștală + abbreviated: cp + sample: true + canonical_probability: 0.4 + abbreviated_probability: 0.3 + sample_probability: 0.3 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.4 # Apdo No 1234 + numeric_probability: 1.0 + alphanumeric: + sample: false + default: *casuta_postala + numeric_probability: 0.9 # Apdo 123 + alpha_probability: 0.05 # Apdo A + numeric_plus_alpha_probability: 0.04 # Apdo 123G + alpha_plus_numeric_probability: 0.01 # Apdo A123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +units: + apartament: &apartament + canonical: apartament + abbreviated: ap + sample: true + canonical_probability: 0.2 + abbreviated_probability: 0.6 + sample_probability: 0.2 + numeric: + direction: left + sala: &sala + canonical: sală + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + birou: &birou + canonical: birou + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + lotul: &lotul + canonical: lotul + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + alphanumeric: &unit_alphanumeric + default: *apartament + probability: 0.9 + sample: true + alternatives: + - alternative: *sala + probability: 0.1 + + # Separate random probability for adding directions like 2o Izq, 2 Dcha, etc. + add_direction: true + add_direction_probability: 0.1 + add_direction_numeric: true # Only for numbers + + numeric_probability: 0.9 # e.g. ap 1 + numeric_plus_alpha_probability: 0.01 # e.g. ap 1A + alpha_plus_numeric_probability: 0.01 # e.g. ap A1 + alpha_probability: 0.08 # e.g. ap A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + + zones: + residential: *unit_alphanumeric + commercial: + default: *birou + numeric_probability: 0.9 # e.g. Birou 1 + numeric_plus_alpha_probability: 0.01 # e.g. Birou 1A + alpha_plus_numeric_probability: 0.01 # e.g. Birou A1 + alpha_probability: 0.08 # e.g. Birou A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + industrial: + default: *lotul + probability: 0.5 + alternatives: + - alternative: *birou + probability: 0.3 + - alternative: *sala + probability: 0.2 + + numeric_probability: 0.9 # e.g. Lotul 1 + numeric_plus_alpha_probability: 0.01 # e.g. Lotul 1A + alpha_plus_numeric_probability: 0.01 # e.g. Lotul A1 + alpha_probability: 0.08 # e.g. Lotul A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + university: + default: *sala + probability: 0.9 + numeric_probability: 0.9 # e.g. Sala 1 + numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A + alpha_plus_numeric_probability: 0.01 # e.g. Sala A1 + alpha_probability: 0.08 # e.g. Sala A + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1