diff --git a/resources/addresses/nb.yaml b/resources/addresses/nb.yaml new file mode 100644 index 00000000..9a7aadea --- /dev/null +++ b/resources/addresses/nb.yaml @@ -0,0 +1,594 @@ +# nb.yaml +# ------- +# Norwegian language specification. + +components: + level: + null_probability: 0.85 + alphanumeric_probability: 0.1 + standalone_probability: 0.05 + + staircase: + null_probability: 0.99 + alphanumeric_probability: 0.01 + + entrance: + null_probability: 0.999 + alphanumeric_probability: 0.001 + + unit: + null_probability: 0.75 + alphanumeric_probability: 0.25 + + combinations: + # e.g. 2/34, more common way to specify a unit number in German + # if unit exists in the first place + level_unit: + components: + - level + - unit + label: unit + zero_pad_digits: 2 + separators: + - separator: "" + probability: 1.0 + probability: 0.05 + + +numbers: + default: &nummer + canonical: nummer + abbreviated: nr + sample: true + # Probabilities + canonical_probability: 0.3 + abbreviated_probability: 0.5 + sample_probability: 0.2 + sample_exclude: + - "#" + numeric: + direction: left + numeric_affix: + affix: "#" + direction: left + + numeric_probability: 0.4 + numeric_affix_probability: 0.6 + + +house_number: + alphanumeric: + default: *nummer + + alphanumeric_phrase_probability: 0.0001 + + +and: + default: &og + canonical: og + abbreviated: "&" + canonical_probability: 0.2 + abbreviated_probability: 0.75 + sample: true + sample_probability: 0.05 + +cross_streets: + and: *og + corner_of: &hjorne_av + canonical: hjørne av + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + at_the_corner_of: &pa_hjornet_av + canonical: på hjørnet av + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + intersection: + default: *og + probability: 0.7 + alternatives: + - alternative: *hjorne_av + probability: 0.15 + - alternative: *pa_hjornet_av + probability: 0.15 + + between: + canonical: mellom + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + parentheses_probability: 0.5 + +levels: + floor: &etasje + canonical: etasje + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + direction_probability: 0.9 + ordinal: + direction: right + numeric_probability: 0.4 + ordinal_probability: 0.6 + hovedetasje: &hovedetasje + canonical: hovedetasje + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric_affix: + affix: h + direction: left + zero_pad: 2 + numeric_probability: 0.1 + numeric_affix_probability: 0.9 + underetasje: &underetasje + canonical: underetasje + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + numeric_affix: + affix: u + direction: left + zero_pad: 2 + numeric_probability: 0.1 + numeric_affix_probability: 0.9 + loftsetasje: &loftsetasje + canonical: loftsetasje + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + numeric_affix: + affix: l + direction: left + zero_pad: 2 + numeric_probability: 0.1 + numeric_affix_probability: 0.9 + loft: &loft + canonical: loft + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: right + kjeller: &kjeller + canonical: kjeller + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + # e.g. 1 kjeller + numeric: + direction: right + direction_probability: 0.8 + # e.g. k1 + numeric_affix: + affix: k + direction: left + # e.g. 1. k + ordinal: + direction: right + standalone_probability: 0.9 + number_abs_value: true + number_min_abs_value: 1 + numeric_probability: 0.005 + numeric_affix_probability: 0.1 + ordinal_probability: 0.005 + aliases: + "<-1": + default: *kjeller + "-1": + default: *kjeller + probability: 0.9 + alternatives: + - alternative: *etasje + probability: 0.1 + "top": + default: *etasje + probability: 0.85 + alternatives: + - alternative: *loftsetasje + probability: 0.1 + - alternative: *loft + probability: 0.05 + + numbering_starts_at: 1 + + alphanumeric: + default: *etasje + probability: 0.95 + alternatives: + - alternative: *underetasje + probability: 0.05 + numeric_probability: 0.99 # With this probability, pick an integer + alpha_probability: 0.0098 # With this probability, pick a letter e.g. A + numeric_plus_alpha_probability: 0.0001 # e.g. 2A + alpha_plus_numeric_probability: 0.0001 # e.g. A2 + +categories: + near: + default: + canonical: i nærheden af + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.7 + alternatives: + - alternative: + canonical: tæt på + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.2 + - alternative: + canonical: tæt ved + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + nearby: + default: + canonical: i nærheden + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.4 + alternatives: + - alternative: + canonical: rundt her + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.2 + - alternative: + canonical: nær her + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + - alternative: + canonical: nær + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + - alternative: + canonical: omkring her + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + - alternative: + canonical: tæt på her + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + near_me: + default: + canonical: nær ​​mig + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.8 + alternatives: + - alternative: + canonical: i nærheden af ​​mig + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + - alternative: + canonical: tæt på mig + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.1 + + # Don't worry about agreement + in: + default: + canonical: i + probability: 0.8 + alternatives: + - alternative: + canonical: om + probability: 0.1 + - alternative: + canonical: på + probability: 0.1 + + + # Probabilities of each phrase + near_probability: 0.35 + nearby_probability: 0.2 + near_me_probability: 0.1 + in_probability: 0.35 + + +directions: + right: &hoyre + canonical: høyre + sample: true + canonical_probability: 0.1 + sample_probability: 0.9 + numeric: + direction: right + numeric_affix: + affix: h + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + left: &igjen + canonical: igjen + sample: true + canonical_probability: 0.1 + sample_probability: 0.9 + numeric: + direction: right + numeric_affix: + affix: i + direction: right + whitespace_probability: 0.1 + numeric_probability: 0.8 + numeric_affix_probability: 0.2 + alternatives: + - alternative: *hoyre + probability: 0.5 + - alternative: *igjen + probability: 0.5 + + +cardinal_directions: + east: &ost + canonical: øst + abbreviated: ø + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: ø + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + west: &vest + canonical: vest + abbreviated: v + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: v + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + north: &nord + canonical: nord + abbreviated: n + canonical_probability: 0.95 + abbreviated_probability: 0.05 + numeric: + direction: right + numeric_affix: + affix: n + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + south: &syd + canonical: syd + abbreviated: s + sample: true + canonical_probability: 0.75 + abbreviated_probability: 0.1 + sample_probability: 0.15 + numeric: + direction: right + numeric_affix: + affix: s + direction: right + numeric_probability: 0.5 + numeric_affix_probability: 0.5 + + alternatives: + - alternative: *nord + probability: 0.25 + - alternative: *ost + probability: 0.25 + - alternative: *syd + probability: 0.25 + - alternative: *vest + probability: 0.25 + + +entrances: + inngang: &inngang + canonical: inngang + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + + # Eingang 1, Eingang A, etc. + alphanumeric: &entrance_alphanumeric + default: *inngang + numeric_probability: 0.1 # e.g. Eingang 1 + alpha_probability: 0.85 # e.g. Eingang A + numeric_plus_alpha_probability: 0.025 # e.g. 1A + alpha_plus_numeric_probability: 0.025 # e.g. A1 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + +staircases: + stiege: &stiege + canonical: stiege + abbreviated: stg + sample: true + canonical_probability: 0.7 + abbreviated_probability: 0.2 + sample_probability: 0.1 + numeric: + direction: left + trapp: &trapp + canonical: trapp + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + numeric: + direction: left + + + alphanumeric: &staircase_alphanumeric + default: *trapp + probability: 0.8 + alternatives: + - alternative: *stiege + probability: 0.2 + numeric_probability: 0.75 + alpha_probability: 0.2 + numeric_plus_alpha_probability: 0.025 + alpha_plus_numeric_probability: 0.025 + + alpha_plus_numeric: + whitespace_probability: 0.1 + + numeric_plus_alpha: + whitespace_probability: 0.1 + + directional: + direction: left + direction_probability: 0.85 + modifier: + alternatives: + - alternative: *nord + - alternative: *syd + - alternative: *ost + - alternative: *vest + +po_boxes: + postboks: &postboks + canonical: postboks + abbreviated: pb + sample: true + canonical_probability: 0.6 + abbreviated_probability: 0.2 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 # Pb No 1234 + boks: &boks + canonical: boks + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + add_number_phrase: true + add_number_phrase_probability: 0.2 # Boks No 1234 + alphanumeric: + sample: false + default: *postboks + probability: 0.9 + alternatives: + - alternative: *boks + probability: 0.1 + numeric_probability: 0.9 # Pb 123 + alpha_probability: 0.05 # Pb A + numeric_plus_alpha_probability: 0.04 # Pb 123G + alpha_plus_numeric_probability: 0.01 # Pb A123 + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + digits: + - length: 1 + probability: 0.05 + - length: 2 + probability: 0.1 + - length: 3 + probability: 0.2 + - length: 4 + probability: 0.5 + - length: 5 + probability: 0.1 + - length: 6 + probability: 0.05 + +units: + leilighet: &leilighet + canonical: leilighet + abbreviated: leil + sample: true + canonical_probability: 0.6 + abbreviated_probability: 0.1 + sample_probability: 0.3 + numeric: + direction: left + null_phrase_probability: 0.4 + # Lejlighed nummer 4 + add_number_phrase: true + add_number_phrase_probability: 0.05 + hus: &hus + canonical: hus + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + numeric: + direction: left + vaerelse: &vaerelse + canonical: værelse + sample: true + canonical_probability: 0.7 + sample_probability: 0.3 + numeric: + direction: left + alphanumeric: &unit_alphanumeric + default: *leilighet + probability: 0.8 + alternatives: + - alternative: *hus + probability: 0.1 + - alternative: *vaerelse + probability: 0.1 + numeric_probability: 0.95 # e.g. Lejlighed 1 + alpha_probability: 0.05 # e.g. Lejl A + + alpha_plus_numeric: + whitespace_probability: 0.1 + numeric_plus_alpha: + whitespace_probability: 0.1 + + # Separate random probability for adding directions like 2H, 2I, etc. + add_direction: true + add_direction_probability: 0.005 + + # Add directions for plain numbers + add_direction_numeric: true + # Add direction only e.g. Lejlighed Igjen + add_direction_standalone: true + + # If there are 10 floors, create unit numbers like #301 or #1032 + use_floor_probability: 0.2 + + # Use the actual floor phrase as long as the whole phrase is numeric + # Has the effect of creating Bolignummer-style units + use_floor_numeric_affix_probability: 0.7 + use_floor_affix_floor_num_digits: 2 + use_floor_affix_unit_num_digits: 2