From af11db14884628eafcea08f572b25c2dfa2efacd Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 4 Jul 2016 13:46:19 -0400 Subject: [PATCH] [addresses] Adding digit spellout and the list form of field combinations to existing configs --- resources/addresses/cs.yaml | 28 +++++-- resources/addresses/da.yaml | 10 ++- resources/addresses/de.yaml | 59 ++++++++----- resources/addresses/en.yaml | 11 +-- resources/addresses/es.yaml | 6 ++ resources/addresses/et.yaml | 2 +- resources/addresses/fi.yaml | 5 +- resources/addresses/fr.yaml | 12 ++- resources/addresses/hu.yaml | 15 ++-- resources/addresses/it.yaml | 15 +++- resources/addresses/ja.yaml | 2 +- resources/addresses/ja_rm.yaml | 2 +- resources/addresses/nb.yaml | 5 +- resources/addresses/nl.yaml | 37 +++++++-- resources/addresses/pl.yaml | 14 +++- resources/addresses/pt.yaml | 148 ++++++++++++++++++++++++--------- resources/addresses/ro.yaml | 4 +- resources/addresses/ru.yaml | 64 ++++++++++++-- resources/addresses/sk.yaml | 36 ++++++-- resources/addresses/zh.yaml | 2 + 20 files changed, 353 insertions(+), 124 deletions(-) diff --git a/resources/addresses/cs.yaml b/resources/addresses/cs.yaml index 27ce91fe..ce80eb70 100644 --- a/resources/addresses/cs.yaml +++ b/resources/addresses/cs.yaml @@ -100,12 +100,16 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right - roman_numeral_probability: 0.7 + digits: + ascii_probability: 0.3 + roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 @@ -120,10 +124,14 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 numeric_probability: 0.4 ordinal_probability: 0.6 etaz: &etaz @@ -134,10 +142,14 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 numeric_probability: 0.4 ordinal_probability: 0.6 prizemi: &prizemi @@ -163,7 +175,9 @@ levels: # e.g. 1. podzemní podlaží ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 standalone_probability: 0.985 number_abs_value: true number_min_abs_value: 1 diff --git a/resources/addresses/da.yaml b/resources/addresses/da.yaml index ca783f27..375de451 100644 --- a/resources/addresses/da.yaml +++ b/resources/addresses/da.yaml @@ -21,7 +21,7 @@ components: alphanumeric_probability: 0.25 combinations: - level_unit: + - components: - level - unit @@ -32,7 +32,7 @@ components: - separator: " - " probability: 0.1 probability: 0.005 - entrance_unit: + - components: - entrance - unit @@ -121,6 +121,9 @@ levels: direction_probability: 0.9 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 etage: &etage @@ -132,6 +135,9 @@ levels: direction: right ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 stuen: &stuen diff --git a/resources/addresses/de.yaml b/resources/addresses/de.yaml index 50eaf784..481685ae 100644 --- a/resources/addresses/de.yaml +++ b/resources/addresses/de.yaml @@ -26,7 +26,7 @@ components: combinations: # e.g. 2/34, more common way to specify a unit number in German # if unit exists in the first place - house_number_unit: + - components: - house_number - unit @@ -117,6 +117,9 @@ levels: direction: right ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.3 numeric_affix_probability: 0.5 ordinal_probability: 0.2 @@ -129,6 +132,9 @@ levels: direction: right ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 stock: &stock @@ -140,6 +146,9 @@ levels: direction: right ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.1 ordinal_probability: 0.9 erdgeschoss: &erdgeschoss @@ -613,29 +622,8 @@ countries: # Combined apartment numbers are very common combinations: - # e.g. Neubaugasse 55/5 - house_number_unit: - probability: 0.7 - separators: - - separator: / - probability: 0.98 - - separator: "-" - probability: 0.02 - # e.g. Neubaugasse 55/1/5 - house_number_staircase_unit: - components: - - house_number - - staircase - - unit - label: house_number - separators: - - separator: / - probability: 0.98 - - separator: "-" - probability: 0.02 - probability: 0.8 # e.g. Neubaugasse 55/A/1/5 - house_number_entrance_staircase_unit: + - components: - house_number - entrance @@ -648,6 +636,31 @@ countries: - separator: "-" probability: 0.02 probability: 0.9 + # e.g. Neubaugasse 55/1/5 + - + components: + - house_number + - staircase + - unit + label: house_number + separators: + - separator: / + probability: 0.98 + - separator: "-" + probability: 0.02 + probability: 0.8 + # e.g. Neubaugasse 55/5 + - + components: + - house_number + - unit + label: house_number + probability: 0.7 + separators: + - separator: / + probability: 0.98 + - separator: "-" + probability: 0.02 units: top: &top diff --git a/resources/addresses/en.yaml b/resources/addresses/en.yaml index 9f6ac3ab..4ef93f23 100644 --- a/resources/addresses/en.yaml +++ b/resources/addresses/en.yaml @@ -65,7 +65,7 @@ components: combinations: # For unit types like 2/34 (more common in Canada and Australia) - house_number_unit: + - components: - house_number - unit @@ -160,9 +160,12 @@ levels: numeric_affix: affix: /f direction: right # affix goes to number's right (always) - # e.g. 1st Floor + # e.g. 1st Floor ordinal: direction: right # canonical or abbreviated form goes to the ordinal's right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 # Probabilities numeric_probability: 0.75 # Use the simple number e.g. Floor 1 (or Floor No. 1) numeric_affix_probability: 0.05 # Use the 2/F (less common) @@ -1356,9 +1359,7 @@ countries: ca: components: combinations: - house_number_unit: - probability: 0.0 - unit_house_number: + - components: - unit - house_number diff --git a/resources/addresses/es.yaml b/resources/addresses/es.yaml index 28d9ba0d..bee822f5 100644 --- a/resources/addresses/es.yaml +++ b/resources/addresses/es.yaml @@ -108,6 +108,9 @@ levels: direction: right direction_probability: 0.95 # Let it vary occasionally e.g. Piso 2o standalone_probability: 0.2 # Let e.g. 5º be the entire floor string + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.6 numeric_affix_probability: 0.05 ordinal_probability: 0.35 @@ -954,6 +957,9 @@ countries: ordinal: direction: right gender: f + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 canonical_probability: 0.6 abbreviated_probability: 0.2 sample_probability: 0.2 diff --git a/resources/addresses/et.yaml b/resources/addresses/et.yaml index 9e626c4a..b677058f 100644 --- a/resources/addresses/et.yaml +++ b/resources/addresses/et.yaml @@ -21,7 +21,7 @@ components: alphanumeric_probability: 0.25 combinations: - house_number_unit: + - components: - house_number - unit diff --git a/resources/addresses/fi.yaml b/resources/addresses/fi.yaml index 07be7d58..319219e3 100644 --- a/resources/addresses/fi.yaml +++ b/resources/addresses/fi.yaml @@ -21,7 +21,7 @@ components: alphanumeric_probability: 0.25 combinations: - staircase_unit: + - components: - staircase - unit @@ -107,6 +107,9 @@ levels: direction_probability: 0.9 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 diff --git a/resources/addresses/fr.yaml b/resources/addresses/fr.yaml index 47859981..e2590aff 100644 --- a/resources/addresses/fr.yaml +++ b/resources/addresses/fr.yaml @@ -21,7 +21,7 @@ components: alphanumeric_probability: 0.2 combinations: - house_number_unit: + - components: - house_number - unit @@ -93,6 +93,9 @@ levels: add_number_phrase_probability: 0.05 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.75 ordinal_probability: 0.25 niveau: &niveau @@ -106,6 +109,9 @@ levels: add_number_phrase_probability: 0.05 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.75 ordinal_probability: 0.25 bel_etage: &bel_etage @@ -889,9 +895,7 @@ countries: null_probability: 0.6 alphanumeric_probability: 0.4 combinations: - house_number_unit: - probability: 0.0 - unit_house_number: + - components: - unit - house_number diff --git a/resources/addresses/hu.yaml b/resources/addresses/hu.yaml index 4fc1a726..3cd9670c 100644 --- a/resources/addresses/hu.yaml +++ b/resources/addresses/hu.yaml @@ -13,17 +13,19 @@ components: alphanumeric_probability: 0.25 combinations: - level_unit: + - components: - level - unit label: unit separators: - separator: "/" - probability: 0.95 + probability: 0.55 + - separator: " " + probability: 0.4 - separator: "-" probability: 0.05 - probability: 0.5 + probability: 0.8 numbers: @@ -100,6 +102,9 @@ levels: direction_probability: 0.9 ordinal: direction: right + digits: + ascii_probability: 0.2 + roman_numeral_probability: 0.8 numeric_probability: 0.1 ordinal_probability: 0.9 foldszint: &foldszint @@ -220,8 +225,8 @@ levels: alphanumeric: default: *emelet - roman_numeral_probability: 0.8 # With this probability, pick a Roman numeral - numeric_probability: 0.19 # With this probability, pick an integer + numeric_probability: 0.59 # With this probability, pick an integer + roman_numeral_probability: 0.4 # Pick a Roman numeral for the actual value alpha_probability: 0.0098 # With this probability, pick a letter e.g. A numeric_plus_alpha_probability: 0.0001 # e.g. 2A alpha_plus_numeric_probability: 0.0001 # e.g. A2 diff --git a/resources/addresses/it.yaml b/resources/addresses/it.yaml index dd1478bc..28a5ab6a 100644 --- a/resources/addresses/it.yaml +++ b/resources/addresses/it.yaml @@ -22,7 +22,7 @@ components: alphanumeric_probability: 0.2 combinations: - house_number_unit: + - components: - house_number - unit @@ -87,10 +87,15 @@ levels: direction_probability: 0.95 add_number_phrase: true add_number_phrase_probability: 0.05 - roman_numeral_probability: 0.1 + digits: + ascii_probability: 0.9 + roman_numeral_probability: 0.1 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.5 + spellout_probability: 0.2 + roman_numeral_probability: 0.3 numeric_probability: 0.55 ordinal_probability: 0.45 livello: &livello @@ -104,7 +109,9 @@ levels: add_number_phrase_probability: 0.05 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 numeric_probability: 0.75 ordinal_probability: 0.25 piano_nobile: &piano_nobile diff --git a/resources/addresses/ja.yaml b/resources/addresses/ja.yaml index 38e771ca..f3bd9624 100644 --- a/resources/addresses/ja.yaml +++ b/resources/addresses/ja.yaml @@ -24,7 +24,7 @@ components: combinations: # Unit is just appended onto the house number - house_number_unit: + - components: - house_number - unit diff --git a/resources/addresses/ja_rm.yaml b/resources/addresses/ja_rm.yaml index bb3b0426..aa4d9a73 100644 --- a/resources/addresses/ja_rm.yaml +++ b/resources/addresses/ja_rm.yaml @@ -23,7 +23,7 @@ components: combinations: # Unit is just appended onto the house number - house_number_unit: + - components: - house_number - unit diff --git a/resources/addresses/nb.yaml b/resources/addresses/nb.yaml index 5cc138b0..371a7067 100644 --- a/resources/addresses/nb.yaml +++ b/resources/addresses/nb.yaml @@ -22,7 +22,7 @@ components: combinations: # Bolignummer - level_unit: + - components: - level - unit @@ -110,6 +110,9 @@ levels: direction_probability: 0.9 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 hovedetasje: &hovedetasje diff --git a/resources/addresses/nl.yaml b/resources/addresses/nl.yaml index 2fa28b4a..f6f9b27c 100644 --- a/resources/addresses/nl.yaml +++ b/resources/addresses/nl.yaml @@ -22,7 +22,7 @@ components: alphanumeric_probability: 0.2 combinations: - house_number_unit: + - components: - house_number - unit @@ -72,10 +72,17 @@ levels: verdieping: &verdieping canonical: verdieping sample: true - canonical_probability: 0.8 - sample_probability: 0.2 + canonical_probability: 0.9 + sample_probability: 0.1 numeric: direction: left + ordinal: + direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 + numeric_probability: 0.7 + ordinal_probability: 0.3 etage: &etage canonical: etage abbreviated: et @@ -85,12 +92,19 @@ levels: sample_probability: 0.2 numeric: direction: left + ordinal: + direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 + numeric_probability: 0.7 + ordinal_probability: 0.3 begane_grond: &begane_grond canonical: begane grond abbreviated: bg sample: true canonical_probability: 0.5 - sample_probability: 0.2 + abbreviated_probability: 0.2 sample_probability: 0.3 benedenverdieping: &benedenverdieping canonical: benedenverdieping @@ -114,11 +128,11 @@ levels: sample_probability: 0.2 aliases: "0": - default: *benedenverdieping - probability: 0.5 + default: *begane_grond + probability: 0.6 alternatives: - - alternative: *begane_grond - probability: 0.45 + - alternative: *benedenverdieping + probability: 0.35 - alternative: *parterre probability: 0.04 - alternative: *het_gelijkvloers @@ -497,6 +511,13 @@ countries: sample_probability: 0.2 numeric: direction: left + ordinal: + direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 + numeric_probability: 0.7 + ordinal_probability: 0.3 aliases: "0": diff --git a/resources/addresses/pl.yaml b/resources/addresses/pl.yaml index 23912fed..d790f570 100644 --- a/resources/addresses/pl.yaml +++ b/resources/addresses/pl.yaml @@ -21,7 +21,7 @@ components: alphanumeric_probability: 0.25 combinations: - house_number_unit: + - components: - house_number - unit @@ -128,10 +128,14 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 ordinal: direction: right - roman_numeral_probability: 0.7 + digits: + ascii_probability: 0.3 + roman_numeral_probability: 0.7 numeric_probability: 0.4 ordinal_probability: 0.6 parter: &parter @@ -152,7 +156,9 @@ levels: # e.g. 1. suterena ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 standalone_probability: 0.985 number_abs_value: true number_min_abs_value: 1 diff --git a/resources/addresses/pt.yaml b/resources/addresses/pt.yaml index 53668772..94e5f33f 100644 --- a/resources/addresses/pt.yaml +++ b/resources/addresses/pt.yaml @@ -25,35 +25,7 @@ components: combinations: - # For unit types like 2/34 (more common in Canada and Australia) - house_number_unit: - components: - - house_number - - unit - label: house_number - separators: - - separator: "-" - probability: 0.9 - - separator: " - " - probability: 0.05 - - separator: / - probability: 0.05 - probability: 0.005 - house_number_floor: - components: - - house_number - - unit - label: house_number - separators: - - separator: "-" - probability: 0.9 - - separator: " - " - probability: 0.05 - - separator: / - probability: 0.05 - probability: 0.005 - - house_number_staircase_unit: + - components: - house_number - staircase @@ -67,6 +39,33 @@ components: - separator: / probability: 0.05 probability: 0.005 + # For unit types like 2/34 (more common in Canada and Australia) + - + components: + - house_number + - unit + label: house_number + separators: + - separator: "-" + probability: 0.9 + - separator: " - " + probability: 0.05 + - separator: / + probability: 0.05 + probability: 0.005 + - + components: + - house_number + - level + label: house_number + separators: + - separator: "-" + probability: 0.9 + - separator: " - " + probability: 0.05 + - separator: / + probability: 0.05 + probability: 0.005 numbers: @@ -134,6 +133,9 @@ levels: standalone_probability: 0.2 # Let e.g. 5º be the entire floor string # If ordinal is selected, chance of e.g. just using 2o without Andar null_phrase_probability: 0.6 + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.2 ordinal_probability: 0.8 nivel: &nivel @@ -151,6 +153,9 @@ levels: direction_probability: 0.95 standalone_probability: 0.2 null_phrase_probability: 0.6 + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.2 ordinal_probability: 0.8 @@ -176,6 +181,9 @@ levels: standalone_probability: 0.2 # Let e.g. 5º be the entire floor string # If ordinal is selected, chance of e.g. just using 2o without Piso null_phrase_probability: 0.6 + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.2 numeric_affix_probability: 0.05 ordinal_probability: 0.75 @@ -204,8 +212,7 @@ levels: terreo: &terreo canonical: terréo sample: true - canonical_probability: 0.3 - abbreviated_probability: 0.4 + canonical_probability: 0.7 sample_probability: 0.3 baixos: &baixos canonical: baixos @@ -241,6 +248,9 @@ levels: canonical: sub cave abbreviated: scv sample: true + canonical_probability: 0.4 + abbreviated_probability: 0.3 + sample_probability: 0.3 # e.g. sub cave 1 numeric: direction: left @@ -750,6 +760,10 @@ units: canonical: casa numeric: direction: left + moradia: &moradia + canonical: moradia + numeric: + direction: left room: &sala canonical: sala numeric: @@ -769,10 +783,12 @@ units: alternatives: - alternative: *sala probability: 0.1 - - alternative: *casa - probability: 0.05 - alternative: *porta probability: 0.05 + - alternative: *casa + probability: 0.04 + - alternative: *moradia + probability: 0.01 # Separate random probability for adding directions like 2o Izq, 2 Dcha, etc. add_direction: true @@ -796,7 +812,9 @@ units: - alternative: *sala probability: 0.1 - alternative: *casa - probability: 0.03 + probability: 0.02 + - alternative: *moradia + probability: 0.01 - alternative: *porta probability: 0.05 - alternative: *letra @@ -903,7 +921,7 @@ countries: levels: numbering_starts_at: 1 aliases: - "0": + "0": &ground_floor_brasil default: *andar_terreo probability: 0.4 alternatives: @@ -920,7 +938,7 @@ countries: probability: 0.05 - alternative: *piso probability: 0.01 - + "1": *ground_floor_brasil postcodes: alphanumeric: @@ -929,8 +947,8 @@ countries: abbreviated: cep sample: true canonical_probability: 0.001 - abbreviated_probability: 0.995 - sample_probability: 0.004 + abbreviated_probability: 0.949 + sample_probability: 0.05 numeric: # Postcodes in Brazil are sometimes prefixed by CEP @@ -945,7 +963,7 @@ countries: numeric_affix_probability: 0.12 strict_numeric: true - po_boxes: + po_boxes: &po_boxes_caixa_postal alphanumeric: default: canonical: caixa postal @@ -982,3 +1000,55 @@ countries: probability: 0.05 - alternative: *letra probability: 0.05 + + # Angola + ao: + postcodes: &postcodes_codigo_postal + alphanumeric: + default: + canonical: código postal + abbreviated: cp + sample: true + canonical_probability: 0.001 + abbreviated_probability: 0.949 + sample_probability: 0.05 + + numeric: + direction: left + + numeric_affix: + affix: cp + direction: left + # null_probability means the chance of doing nothing e.g. just the postal code + null_probability: 0.7 + numeric_probability: 0.18 + numeric_affix_probability: 0.12 + strict_numeric: true + + po_boxes: *po_boxes_caixa_postal + + # Mozambique + mz: + postcodes: *postcodes_codigo_postal + po_boxes: *po_boxes_caixa_postal + + # Cape Verde + cv: + po_boxes: *po_boxes_caixa_postal + + + # East Timor + tl: + po_boxes: *po_boxes_caixa_postal + + # São Tome and Principe + st: + po_boxes: *po_boxes_caixa_postal + + # Guinea-Bissau + gw: + po_boxes: *po_boxes_caixa_postal + + # Macau + mo: + po_boxes: *po_boxes_caixa_postal diff --git a/resources/addresses/ro.yaml b/resources/addresses/ro.yaml index 7c5709fa..616b9f25 100644 --- a/resources/addresses/ro.yaml +++ b/resources/addresses/ro.yaml @@ -114,7 +114,9 @@ levels: direction: left add_number_phrase: true # Occasionally add variation of "number", e.g. et. nr 2 add_number_phrase_probability: 0.05 - roman_numeral_probability: 0.2 + digits: + ascii_probability: 0.8 + roman_numeral_probability: 0.2 # Ground floor parter: &parter canonical: parter diff --git a/resources/addresses/ru.yaml b/resources/addresses/ru.yaml index 8c4c7dd8..3a1cdf38 100644 --- a/resources/addresses/ru.yaml +++ b/resources/addresses/ru.yaml @@ -53,8 +53,8 @@ house_number: canonical: дом abbreviated: д sample: true - canonical_probability: 0.8 - abbreviated_probability: 0.1 + canonical_probability: 0.6 + abbreviated_probability: 0.3 sample_probability: 0.1 numeric: direction: left @@ -62,8 +62,8 @@ house_number: canonical: dom abbreviated: d sample: true - canonical_probability: 0.8 - abbreviated_probability: 0.1 + canonical_probability: 0.6 + abbreviated_probability: 0.3 sample_probability: 0.1 numeric: direction: left @@ -95,8 +95,8 @@ and: cross_streets: - and: *i - and: *i_latin + i: *i + i_latin: *i_latin corner: &ugol canonical: угол sample: true @@ -177,6 +177,9 @@ levels: direction_probability: 0.9 ordinal: direction: right + digits: + ascii_probability: 0.8 + spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 etazh_latin: &etazh_latin @@ -217,6 +220,26 @@ levels: direction: right numeric_probability: 0.4 ordinal_probability: 0.6 + pervyy_etazh: &pervyy_etazh + canonical: первый этаж + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + pervyy_etazh_latin: &pervyy_etazh_latin + canonical: pervyy etazh + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + nizhniy_etazh: &nizhniy_etazh + canonical: нижний этаж + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 + nizhniy_etazh_latin: &nizhniy_etazh_latin + canonical: nizhniy etazh + sample: true + canonical_probability: 0.9 + sample_probability: 0.1 tsokolnyy_etazh: &tsokolnyy_etazh canonical: цокольный этаж abbreviated: цок эт @@ -295,7 +318,20 @@ levels: probability: 0.09 - alternative: *etazh_latin probability: 0.01 - "0": *ground_floor + "0": + default: *pervyy_etazh + probability: 0.6 + alternatives: + - alternative: *pervyy_etazh_latin + probability: 0.05 + - alternative: *nizhniy_etazh + probability: 0.2 + - alternative: *nizhniy_etazh_latin + probability: 0.05 + - alternative: *tsokolnyy_etazh + probability: 0.075 + - alternative: *tsokolnyy_etazh_latin + probability: 0.025 numbering_starts_at: 0 @@ -321,7 +357,7 @@ categories: sample: true canonical_probability: 0.8 sample_probability: 0.2 - probability: 0.74 + probability: 0.69 alternatives: - alternative: canonical: vblizi @@ -347,6 +383,18 @@ categories: canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 + - alternative: + canonical: под + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.04 + - alternative: + canonical: pod + sample: true + canonical_probability: 0.8 + sample_probability: 0.2 + probability: 0.01 - alternative: canonical: okolo sample: true diff --git a/resources/addresses/sk.yaml b/resources/addresses/sk.yaml index 75eff6e6..27187f20 100644 --- a/resources/addresses/sk.yaml +++ b/resources/addresses/sk.yaml @@ -101,12 +101,16 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right - roman_numeral_probability: 0.7 + digits: + ascii_probability: 0.3 + roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 @@ -119,12 +123,16 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right - roman_numeral_probability: 0.7 + digits: + ascii_probability: 0.3 + roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 @@ -140,10 +148,14 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 numeric_probability: 0.4 ordinal_probability: 0.6 etaz: &etaz @@ -154,10 +166,14 @@ levels: numeric: direction: left direction_probability: 0.9 - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 numeric_probability: 0.4 ordinal_probability: 0.6 prizemie: &prizemie @@ -183,7 +199,9 @@ levels: # e.g. 1. podzemné podlažie ordinal: direction: right - roman_numeral_probability: 0.3 + digits: + ascii_probability: 0.7 + roman_numeral_probability: 0.3 standalone_probability: 0.985 number_abs_value: true number_min_abs_value: 1 diff --git a/resources/addresses/zh.yaml b/resources/addresses/zh.yaml index 56825775..f601c622 100644 --- a/resources/addresses/zh.yaml +++ b/resources/addresses/zh.yaml @@ -117,6 +117,7 @@ po_boxes: affix: 邮政信箱 direction: left digits: + ascii_probability: 0.3 unicode_full_width_probability: 0.5 spellout_probability: 0.2 use_number_phrase: true @@ -129,6 +130,7 @@ po_boxes: affix: 郵政信箱 direction: left digits: + ascii_probability: 0.3 unicode_full_width_probability: 0.5 spellout_probability: 0.2 use_number_phrase: true