From 8383d5bb12c1e76b34198cdda359f46005dd59e9 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 24 Jun 2016 16:06:59 -0400 Subject: [PATCH] [numex] Adding numeric expression spellout in the Python geodata module for generating training data --- resources/numex/be.yaml | 2 + resources/numex/bg.yaml | 8 ++ resources/numex/ca.yaml | 11 ++ resources/numex/cs.yaml | 1 + resources/numex/da.yaml | 212 ++++++++++++++++++++++++++-- resources/numex/de.yaml | 65 +++++++++ resources/numex/el.yaml | 3 + resources/numex/en.yaml | 10 ++ resources/numex/es.yaml | 61 ++++++++ resources/numex/et.yaml | 1 + resources/numex/fi.yaml | 11 ++ resources/numex/fr.yaml | 72 ++++++++-- resources/numex/hr.yaml | 17 +++ resources/numex/hu.yaml | 7 + resources/numex/it.yaml | 13 ++ resources/numex/ja.yaml | 1 + resources/numex/ko.yaml | 1 + resources/numex/la.yaml | 1 + resources/numex/lv.yaml | 48 +++++++ resources/numex/mt.yaml | 39 +++++ resources/numex/nb.yaml | 210 ++++++++++++++++++++++++--- resources/numex/nl.yaml | 107 +++++++++++++- resources/numex/pt.yaml | 36 +++++ resources/numex/ro.yaml | 12 ++ resources/numex/ru.yaml | 2 + resources/numex/sk.yaml | 44 +++++- resources/numex/sl.yaml | 1 + resources/numex/sv.yaml | 89 +++++++----- resources/numex/tr.yaml | 1 + resources/numex/uk.yaml | 2 + resources/numex/zh.yaml | 1 + scripts/geodata/numbers/spellout.py | 180 +++++++++++++++++++++++ scripts/geodata/osm/formatter.py | 6 + 33 files changed, 1194 insertions(+), 81 deletions(-) create mode 100644 scripts/geodata/numbers/spellout.py diff --git a/resources/numex/be.yaml b/resources/numex/be.yaml index 1c081934..d4100df7 100644 --- a/resources/numex/be.yaml +++ b/resources/numex/be.yaml @@ -9,6 +9,7 @@ name: "адзiн" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "адно" value: 1 @@ -19,6 +20,7 @@ value: 1 type: "cardinal" gender: "f" + multiply_gte: 1000 - name: "два" value: 2 diff --git a/resources/numex/bg.yaml b/resources/numex/bg.yaml index 3b340fbd..65b27d8b 100644 --- a/resources/numex/bg.yaml +++ b/resources/numex/bg.yaml @@ -96,41 +96,49 @@ value: 20 type: "cardinal" right: "add" + right_separator: " и " - name: "тридесет" value: 30 type: "cardinal" right: "add" + right_separator: " и " - name: "четиридесет" value: 40 type: "cardinal" right: "add" + right_separator: " и " - name: "петдесет" value: 50 type: "cardinal" right: "add" + right_separator: " и " - name: "шестдесет" value: 60 type: "cardinal" right: "add" + right_separator: " и " - name: "седемдесет" value: 70 type: "cardinal" right: "add" + right_separator: " и " - name: "осемдесет" value: 80 type: "cardinal" right: "add" + right_separator: " и " - name: "деветдесет" value: 90 type: "cardinal" right: "add" + right_separator: " и " - name: "сто" value: 100 diff --git a/resources/numex/ca.yaml b/resources/numex/ca.yaml index c914ba8d..7e76bd4a 100644 --- a/resources/numex/ca.yaml +++ b/resources/numex/ca.yaml @@ -9,6 +9,7 @@ name: "un" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "una" value: 1 @@ -96,46 +97,56 @@ value: 20 type: "cardinal" right: "add" + right_separator: "-i-" - name: "trenta" value: 30 type: "cardinal" right: "add" + right_separator: "-" - name: "quaranta" value: 40 type: "cardinal" right: "add" + right_separator: "-" - name: "cinquanta" value: 50 type: "cardinal" right: "add" + right_separator: "-" - name: "seixanta" value: 60 type: "cardinal" right: "add" + right_separator: "-" - name: "setanta" value: 70 type: "cardinal" right: "add" + right_separator: "-" - name: "vuitanta" value: 80 type: "cardinal" right: "add" + right_separator: "-" - name: "noranta" value: 90 type: "cardinal" right: "add" + right_separator: "-" - name: "cent" value: 100 type: "cardinal" right: "add" + right_separator: "-" + exact_multiple_only: true - name: "cents" value: 100 diff --git a/resources/numex/cs.yaml b/resources/numex/cs.yaml index ff56cd87..a4b0f42d 100644 --- a/resources/numex/cs.yaml +++ b/resources/numex/cs.yaml @@ -19,6 +19,7 @@ value: 1 type: "cardinal" gender: "f" + multiply_gte: 100 - name: "dva" value: 2 diff --git a/resources/numex/da.yaml b/resources/numex/da.yaml index a0b4c6bb..6387cab1 100644 --- a/resources/numex/da.yaml +++ b/resources/numex/da.yaml @@ -14,6 +14,7 @@ value: 1 type: "cardinal" gender: "n" + multiply_gte: 100 - name: "to" value: 2 @@ -90,69 +91,86 @@ name: "tyve" value: 20 type: "cardinal" + left: "add" + left_separator: "og" - name: "tredive" value: 30 type: "cardinal" + left: "add" + left_separator: "og" - name: "fyrre" value: 40 type: "cardinal" + left: "add" + left_separator: "og" - name: "halvtreds" value: 50 type: "cardinal" + left: "add" + left_separator: "og" - name: "tres" value: 60 type: "cardinal" + left: "add" + left_separator: "og" - name: "halvfjerds" value: 70 type: "cardinal" + left: "add" + left_separator: "og" - name: "firs" value: 80 type: "cardinal" + left: "add" + left_separator: "og" - name: "halvfems" value: 90 type: "cardinal" + left: "add" + left_separator: "og" - - name: "et­hundrede" - value: 100 - type: "cardinal" - right: "add" - - - name: "hundred" + name: "hundrede" value: 100 type: "cardinal" left: "multiply" + left_separator: "" right: "add" + right_separator: " og " - - name: "et tusinde" + name: "tusinde" value: 1000 type: "cardinal" + left: "multiply" right: "add" + right: "add" + right_separator: " og " - name: "tusind" value: 1000 type: "cardinal" + gender: "n" left: "multiply" right: "add" + right_separator: " og " - - name: "en million" + name: "million" value: 1000000 type: "cardinal" right: "add" - - + - name: "millioner" value: 1000000 type: "cardinal" - left: "multiply" right: "add" - - name: "en milliard" + name: "milliard" value: 1000000000 type: "cardinal" right: "add" @@ -163,7 +181,7 @@ left: "multiply" right: "add" - - name: "en billion" + name: "billion" value: 1000000000000 type: "cardinal" right: "add" @@ -174,7 +192,7 @@ left: "multiply" right: "add" - - name: "en billiard" + name: "billiard" value: 1000000000000000 type: "cardinal" right: "add" @@ -184,6 +202,174 @@ type: "cardinal" left: "multiply" right: "add" + - + name: "nulte" + value: 0 + type: "ordinal" + - + name: "første" + value: 1 + type: "ordinal" + - + name: "foerste" + value: 1 + type: "ordinal" + - + name: "anden" + value: 2 + type: "ordinal" + - + name: "tredje" + value: 3 + type: "ordinal" + - + name: "fjerde" + value: 4 + type: "ordinal" + - + name: "femte" + value: 5 + type: "ordinal" + - + name: "sjette" + value: 6 + type: "ordinal" + - + name: "syvende" + value: 7 + type: "ordinal" + - + name: "ottende" + value: 8 + type: "ordinal" + - + name: "niende" + value: 9 + type: "ordinal" + - + name: "tiende" + value: 10 + type: "ordinal" + - + name: "ellevte" + value: 11 + type: "ordinal" + - + name: "tolvte" + value: 12 + type: "ordinal" + - + name: "trettende" + value: 13 + type: "ordinal" + - + name: "fjortende" + value: 14 + type: "ordinal" + - + name: "femtende" + value: 15 + type: "ordinal" + - + name: "sekstende" + value: 16 + type: "ordinal" + - + name: "syttende" + value: 17 + type: "ordinal" + - + name: "attende" + value: 18 + type: "ordinal" + - + name: "nittende" + value: 19 + type: "ordinal" + - + name: "tyvende" + value: 20 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "tredivte" + value: 30 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "fyrrende" + value: 40 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "halvtredsindstyvende" + value: 50 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "tresindstyvende" + value: 60 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "halvfjerdsindstyvende" + value: 70 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "firsindstyvende" + value: 80 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "halvfemsindstyvende" + value: 90 + type: "ordinal" + left: "add" + left_separator: "og" + - + name: "hundredede" + value: 100 + type: "ordinal" + left: "multiply" + left_separator: " " + - + name: "tusinde" + value: 1000 + type: "ordinal" + left: "multiply" + left_separator: " " + - + name: "millionte" + value: 1000000 + type: "ordinal" + left: "multiply" + left_separator: " " + - + name: "milliardte" + value: 1000000000 + type: "ordinal" + left: "multiply" + left_separator: " " + - + name: "billionte" + value: 1000000000000 + type: "ordinal" + left: "multiply" + left_separator: " " + - + name: "billiardte" + value: 1000000000000000 + type: "ordinal" + left: "multiply" + left_separator: " " ordinal_indicators: - suffixes: diff --git a/resources/numex/de.yaml b/resources/numex/de.yaml index f3e2f7d9..b4b52767 100644 --- a/resources/numex/de.yaml +++ b/resources/numex/de.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "null" @@ -14,11 +15,13 @@ value: 1 type: "cardinal" gender: "m" + multiply_gte: 100 - name: "eine" value: 1 type: "cardinal" gender: "f" + multiply_gte: 1000000 - name: "einen" value: 1 @@ -133,116 +136,145 @@ value: 20 type: "cardinal" left: "add" + left_separator: "und" - name: "dreißig" value: 30 type: "cardinal" left: "add" + left_separator: "und" - name: "dreissig" value: 30 type: "cardinal" left: "add" + left_separator: "und" - name: "vierzig" value: 40 type: "cardinal" left: "add" + left_separator: "und" - name: "fünfzig" value: 50 type: "cardinal" left: "add" + left_separator: "und" - name: "funfzig" value: 50 type: "cardinal" left: "add" + left_separator: "und" - name: "fuenfzig" value: 50 type: "cardinal" left: "add" + left_separator: "und" - name: "sechzig" value: 60 type: "cardinal" left: "add" + left_separator: "und" - name: "siebzig" value: 70 type: "cardinal" left: "add" + left_separator: "und" - name: "achtzig" value: 80 type: "cardinal" left: "add" + left_separator: "und" - name: "neunzig" value: 90 type: "cardinal" left: "add" + left_separator: "und" - name: "hundert" value: 100 type: "cardinal" left: "multiply" right: "add" + right_separator: "" - name: "tausend" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: "" - name: "million" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "millionen" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milliarde" value: 1000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milliarden" value: 1000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "billion" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "billionen" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "billiarde" value: 1000000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "billiarden" value: 1000000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "nullte" value: 0 @@ -560,166 +592,199 @@ value: 20 type: "ordinal" left: "add" + left_separator: "und" - name: "zwanzigster" value: 20 type: "ordinal" left: "add" + left_separator: "und" - name: "zwanzigstes" value: 20 type: "ordinal" left: "add" + left_separator: "und" - name: "dreißigste" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "dreißigster" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "dreißigstes" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "dreissigste" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "dreissigster" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "dreissigstes" value: 30 type: "ordinal" left: "add" + left_separator: "und" - name: "vierzigste" value: 40 type: "ordinal" left: "add" + left_separator: "und" - name: "vierzigster" value: 40 type: "ordinal" left: "add" + left_separator: "und" - name: "vierzigstes" value: 40 type: "ordinal" left: "add" + left_separator: "und" - name: "fünfzigste" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "fünfzigster" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "fünfzigstes" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "funfzigste" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "funfzigster" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "funfzigstes" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "fuenfzigste" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "fuenfzigster" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "fuenfzigstes" value: 50 type: "ordinal" left: "add" + left_separator: "und" - name: "sechzigste" value: 60 type: "ordinal" left: "add" + left_separator: "und" - name: "sechzigster" value: 60 type: "ordinal" left: "add" + left_separator: "und" - name: "sechzigstes" value: 60 type: "ordinal" left: "add" + left_separator: "und" - name: "siebzigste" value: 70 type: "ordinal" left: "add" + left_separator: "und" - name: "siebzigster" value: 70 type: "ordinal" left: "add" + left_separator: "und" - name: "siebzigstes" value: 70 type: "ordinal" left: "add" + left_separator: "und" - name: "achtzigste" value: 80 type: "ordinal" left: "add" + left_separator: "und" - name: "achtzigster" value: 80 type: "ordinal" left: "add" + left_separator: "und" - name: "achtzigstes" value: 80 type: "ordinal" left: "add" + left_separator: "und" - name: "neunzigste" value: 90 type: "ordinal" left: "add" + left_separator: "und" - name: "neunzigster" value: 90 type: "ordinal" left: "add" + left_separator: "und" - name: "neunzigstes" value: 90 type: "ordinal" left: "add" + left_separator: "und" - name: "hundertste" value: 100 diff --git a/resources/numex/el.yaml b/resources/numex/el.yaml index e3117c6a..05fb99e7 100644 --- a/resources/numex/el.yaml +++ b/resources/numex/el.yaml @@ -22,6 +22,7 @@ value: 1 type: "cardinal" gender: "n" + multiply_gte: 1000000 - name: "ενα" value: 1 @@ -288,11 +289,13 @@ value: 100 type: "cardinal" right: "add" + right_separator: "ν " - name: "εκατο" value: 100 type: "cardinal" right: "add" + right_separator: "ν " - name: "διακόσιοι" value: 200 diff --git a/resources/numex/en.yaml b/resources/numex/en.yaml index 6b2b512d..cd11f79e 100644 --- a/resources/numex/en.yaml +++ b/resources/numex/en.yaml @@ -13,6 +13,7 @@ name: "one" value: 1 type: "cardinal" + multiply_gte: 100 - name: "two" value: 2 @@ -90,46 +91,55 @@ value: 20 type: "cardinal" right: "add" + right_separator: "-" - name: "thirty" value: 30 type: "cardinal" right: "add" + right_separator: "-" - name: "forty" value: 40 type: "cardinal" right: "add" + right_separator: "-" - name: "fourty" value: 40 type: "cardinal" right: "add" + right_separator: "-" - name: "fifty" value: 50 type: "cardinal" right: "add" + right_separator: "-" - name: "sixty" value: 60 type: "cardinal" right: "add" + right_separator: "-" - name: "seventy" value: 70 type: "cardinal" right: "add" + right_separator: "-" - name: "eighty" value: 80 type: "cardinal" right: "add" + right_separator: "-" - name: "ninety" value: 90 type: "cardinal" right: "add" + right_separator: "-" - name: "hundred" value: 100 diff --git a/resources/numex/es.yaml b/resources/numex/es.yaml index d7063cae..c4c010b8 100644 --- a/resources/numex/es.yaml +++ b/resources/numex/es.yaml @@ -9,6 +9,7 @@ name: "un" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "uno" value: 1 @@ -94,45 +95,97 @@ name: "veintiun" value: 21 type: "cardinal" + - + name: "veintidós" + value: 22 + type: "cardinal" + - + name: "veintidos" + value: 22 + type: "cardinal" + - + name: "veintitrés" + value: 23 + type: "cardinal" + - + name: "veintitres" + value: 23 + type: "cardinal" + - + name: "veinticuatro" + value: 24 + type: "cardinal" + - + name: "veinticinco" + value: 25 + type: "cardinal" + - + name: "veintiséis" + value: 26 + type: "cardinal" + - + name: "veintiseis" + value: 26 + type: "cardinal" + - + name: "veintisiete" + value: 27 + type: "cardinal" + - + name: "veintiocho" + value: 28 + type: "cardinal" + - + name: "veintinueve" + value: 29 + type: "cardinal" - name: "treinta" value: 30 type: "cardinal" right: "add" + right_separator: " y " - name: "cuarenta" value: 40 type: "cardinal" right: "add" + right_separator: " y " - name: "cincuenta" value: 50 type: "cardinal" right: "add" + right_separator: " y " - name: "sesenta" value: 60 type: "cardinal" right: "add" + right_separator: " y " - name: "setenta" value: 70 type: "cardinal" right: "add" + right_separator: " y " - name: "ochenta" value: 80 type: "cardinal" right: "add" + right_separator: " y " - name: "noventa" value: 90 type: "cardinal" right: "add" + right_separator: " y " - name: "cien" value: 100 type: "cardinal" + exact_multiple_only: true - name: "ciento" value: 100 @@ -148,6 +201,7 @@ value: 200 type: "cardinal" right: "add" + gender: "f" - name: "trescientos" value: 300 @@ -158,6 +212,7 @@ value: 300 type: "cardinal" right: "add" + gender: "f" - name: "cuatrocientos" value: 400 @@ -168,6 +223,7 @@ value: 400 type: "cardinal" right: "add" + gender: "f" - name: "quinientos" value: 500 @@ -178,6 +234,7 @@ value: 500 type: "cardinal" right: "add" + gender: "f" - name: "seiscientos" value: 600 @@ -188,6 +245,7 @@ value: 600 type: "cardinal" right: "add" + gender: "f" - name: "setecientos" value: 700 @@ -198,6 +256,7 @@ value: 700 type: "cardinal" right: "add" + gender: "f" - name: "ochocientos" value: 800 @@ -208,6 +267,7 @@ value: 800 type: "cardinal" right: "add" + gender: "f" - name: "novecientos" value: 900 @@ -218,6 +278,7 @@ value: 900 type: "cardinal" right: "add" + gender: "f" - name: "mil" value: 1000 diff --git a/resources/numex/et.yaml b/resources/numex/et.yaml index e268e1e7..24f67610 100644 --- a/resources/numex/et.yaml +++ b/resources/numex/et.yaml @@ -9,6 +9,7 @@ name: "üks" value: 1 type: "cardinal" + multiply_gte: 100 - name: "uks" value: 1 diff --git a/resources/numex/fi.yaml b/resources/numex/fi.yaml index 145e91a5..fb5f2de2 100644 --- a/resources/numex/fi.yaml +++ b/resources/numex/fi.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "nolla" @@ -206,6 +207,7 @@ value: 100 type: "cardinal" right: "add" + exact_multiple_only: true - name: "sataa" value: 100 @@ -228,34 +230,43 @@ value: 1000000 type: "cardinal" right: "add" + exact_multiple_only: true + left_separator: " " - name: "miljoonaa" value: 1000000 type: "cardinal" left: "multiply" right: "add" + left_separator: " " - name: "miljardi" value: 1000000000 type: "cardinal" right: "add" + exact_multiple_only: true + left_separator: " " - name: "miljardia" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + left_separator: " " - name: "biljoona" value: 1000000000000 type: "cardinal" right: "add" + exact_multiple_only: true + left_separator: " " - name: "biljoonaa" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + left_separator: " " - name: "ensimmäinen" value: 1 diff --git a/resources/numex/fr.yaml b/resources/numex/fr.yaml index fbf8d42f..89117810 100644 --- a/resources/numex/fr.yaml +++ b/resources/numex/fr.yaml @@ -14,6 +14,7 @@ value: 1 type: "cardinal" gender: "m" + multiply_gte: 1000000 - name: "une" value: 1 @@ -79,10 +80,18 @@ name: "seize" value: 16 type: "cardinal" + - + name: "dix-sept" + value: 17 + type: "cardinal" - name: "dix sept" value: 17 type: "cardinal" + - + name: "dix-huit" + value: 18 + type: "cardinal" - name: "dix huit" value: 18 @@ -91,6 +100,10 @@ name: "dixhuit" value: 18 type: "cardinal" + - + name: "dix-neuf" + value: 19 + type: "cardinal" - name: "dix neuf" value: 19 @@ -104,48 +117,91 @@ value: 20 type: "cardinal" right: "add" + right_separator: "-" + - + name: "vingt-et-un" + value: 21 + type: "cardinal" - name: "trente" value: 30 type: "cardinal" right: "add" + right_separator: "-" + - + name: "trente-et-un" + value: 31 + type: "cardinal" - name: "quarante" value: 40 type: "cardinal" right: "add" + right_separator: "-" + - + name: "quarante-et-un" + value: 41 + type: "cardinal" - name: "cinquante" value: 50 type: "cardinal" right: "add" + right_separator: "-" + - + name: "cinquante-et-un" + value: 51 + type: "cardinal" - name: "soixante" value: 60 type: "cardinal" radix: 20 right: "add" + right_separator: "-" - name: "septante" value: 70 type: "cardinal" right: "add" + right_separator: "-" - name: "huitante" value: 80 type: "cardinal" right: "add" + right_separator: "-" - - name: "octante" + name: "quatre-vingt" value: 80 type: "cardinal" + radix: 20 right: "add" + right_separator: "-" + - + name: "quatre-vingt" + value: 80 + type: "cardinal" + radix: 20 + right: "add" + right_separator: "-" + - + name: "quatrevingt" + value: 80 + type: "cardinal" + radix: 20 + right: "add" + right_separator: "-" + - + name: "quatre-vingts" + value: 80 + type: "cardinal" + radix: 20 - name: "quatre vingts" value: 80 type: "cardinal" radix: 20 - right: "add" - name: "quatrevingts" value: 80 @@ -153,28 +209,24 @@ radix: 20 right: "add" - - name: "quatre vingt" + name: "octante" value: 80 type: "cardinal" - radix: 20 - right: "add" - - - name: "quatrevingt" - value: 80 - type: "cardinal" - radix: 20 right: "add" + right_separator: "-" - name: "nonante" value: 90 type: "cardinal" right: "add" + right_separator: "-" - name: "cent" value: 100 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "cents" value: 100 diff --git a/resources/numex/hr.yaml b/resources/numex/hr.yaml index 00723584..61bef6e5 100644 --- a/resources/numex/hr.yaml +++ b/resources/numex/hr.yaml @@ -9,6 +9,7 @@ name: "jedan" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "jedna" value: 1 @@ -117,51 +118,61 @@ value: 20 type: "cardinal" right: "add" + right_separator: " i " - name: "trideset" value: 30 type: "cardinal" right: "add" + right_separator: " i " - name: "četrdeset" value: 40 type: "cardinal" right: "add" + right_separator: " i " - name: "cetrdeset" value: 40 type: "cardinal" right: "add" + right_separator: " i " - name: "pedeset" value: 50 type: "cardinal" right: "add" + right_separator: " i " - name: "šezdeset" value: 60 type: "cardinal" right: "add" + right_separator: " i " - name: "sezdeset" value: 60 type: "cardinal" right: "add" + right_separator: " i " - name: "sedamdeset" value: 70 type: "cardinal" right: "add" + right_separator: " i " - name: "osamdeset" value: 80 type: "cardinal" right: "add" + right_separator: " i " - name: "devedeset" value: 90 type: "cardinal" right: "add" + right_separator: " i " - name: "sto" value: 100 @@ -223,12 +234,14 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "tisucu" value: 1000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "tisuće" value: 1000 @@ -259,6 +272,7 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "milijuna" value: 1000000 @@ -271,6 +285,7 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "milijarde" value: 1000000000 @@ -289,6 +304,7 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "bilijuna" value: 1000000000000 @@ -301,6 +317,7 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true - name: "bilijarde" value: 1000000000000000 diff --git a/resources/numex/hu.yaml b/resources/numex/hu.yaml index 0b26ead5..b799e0f0 100644 --- a/resources/numex/hu.yaml +++ b/resources/numex/hu.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "nulla" @@ -9,6 +10,7 @@ name: "egy" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "kettő" value: 2 @@ -17,6 +19,10 @@ name: "ketto" value: 2 type: "cardinal" + - + name: "két" + value: 2 + type: "cardinal" - name: "ket" value: 2 @@ -149,6 +155,7 @@ type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "millió" value: 1000000 diff --git a/resources/numex/it.yaml b/resources/numex/it.yaml index 214ec785..3b7d58b5 100644 --- a/resources/numex/it.yaml +++ b/resources/numex/it.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "zero" @@ -10,6 +11,7 @@ value: 1 type: "cardinal" gender: "m" + multiply_gte: 1000000 - name: "uno" value: 1 @@ -229,42 +231,53 @@ type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "milioni" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "miliardo" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "miliardi" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "bilione" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "bilioni" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biliarde" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biliardi" value: 1000000000000000 diff --git a/resources/numex/ja.yaml b/resources/numex/ja.yaml index 13118436..0f15fa0c 100644 --- a/resources/numex/ja.yaml +++ b/resources/numex/ja.yaml @@ -1,4 +1,5 @@ --- + default_separator: "" rules: - name: "〇" diff --git a/resources/numex/ko.yaml b/resources/numex/ko.yaml index 7e7fa379..3fb32185 100644 --- a/resources/numex/ko.yaml +++ b/resources/numex/ko.yaml @@ -1,4 +1,5 @@ --- + default_separator: "" rules: - name: "영" diff --git a/resources/numex/la.yaml b/resources/numex/la.yaml index fdf1d24c..7c26838a 100644 --- a/resources/numex/la.yaml +++ b/resources/numex/la.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "i" diff --git a/resources/numex/lv.yaml b/resources/numex/lv.yaml index b1b267d5..b3bb6e63 100644 --- a/resources/numex/lv.yaml +++ b/resources/numex/lv.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "nulle" @@ -9,6 +10,7 @@ name: "viens" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "viena" value: 1 @@ -269,222 +271,268 @@ value: 100 type: "cardinal" right: "add" + right_separator: " " - name: "divsimt" value: 200 type: "cardinal" right: "add" + right_separator: " " - name: "trīssimt" value: 300 type: "cardinal" right: "add" + right_separator: " " - name: "trissimt" value: 300 type: "cardinal" right: "add" + right_separator: " " - name: "četrsimt" value: 400 type: "cardinal" right: "add" + right_separator: " " - name: "cetrsimt" value: 400 type: "cardinal" right: "add" + right_separator: " " - name: "piecsimt" value: 500 type: "cardinal" right: "add" + right_separator: " " - name: "sešsimt" value: 600 type: "cardinal" right: "add" + right_separator: " " - name: "sessimt" value: 600 type: "cardinal" right: "add" + right_separator: " " - name: "septiņsimt" value: 700 type: "cardinal" right: "add" + right_separator: " " - name: "septinsimt" value: 700 type: "cardinal" right: "add" + right_separator: " " - name: "astoņsimt" value: 800 type: "cardinal" right: "add" + right_separator: " " - name: "astonsimt" value: 800 type: "cardinal" right: "add" + right_separator: " " - name: "deviņsimt" value: 900 type: "cardinal" right: "add" + right_separator: " " - name: "devinsimt" value: 900 type: "cardinal" right: "add" + right_separator: " " - name: "tūkstoš" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "tukstos" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "tūkstoši" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "tukstosi" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "divtūkstoš" value: 2000 type: "cardinal" right: "add" + right_separator: " " - name: "divtukstos" value: 2000 type: "cardinal" right: "add" + right_separator: " " - name: "trīstūkstoš" value: 3000 type: "cardinal" right: "add" + right_separator: " " - name: "tristukstos" value: 3000 type: "cardinal" right: "add" + right_separator: " " - name: "četrtūkstoš" value: 4000 type: "cardinal" right: "add" + right_separator: " " - name: "cetrtukstos" value: 4000 type: "cardinal" right: "add" + right_separator: " " - name: "piectūkstoš" value: 5000 type: "cardinal" right: "add" + right_separator: " " - name: "piectukstos" value: 5000 type: "cardinal" right: "add" + right_separator: " " - name: "seštūkstoš" value: 6000 type: "cardinal" right: "add" + right_separator: " " - name: "sestukstos" value: 6000 type: "cardinal" right: "add" + right_separator: " " - name: "septiņtūkstoš" value: 7000 type: "cardinal" right: "add" + right_separator: " " - name: "septintukstos" value: 7000 type: "cardinal" right: "add" + right_separator: " " - name: "astoņtūkstoš" value: 8000 type: "cardinal" right: "add" + right_separator: " " - name: "astontukstos" value: 8000 type: "cardinal" right: "add" + right_separator: " " - name: "deviņtūkstoš" value: 9000 type: "cardinal" right: "add" + right_separator: " " - name: "devintukstos" value: 9000 type: "cardinal" right: "add" + right_separator: " " - name: "miljons" value: 1000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "miljoni" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "miljards" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "miljardi" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biljons" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biljoni" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biljards" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biljardi" value: 1000000000000000 diff --git a/resources/numex/mt.yaml b/resources/numex/mt.yaml index b16002e1..5a937fc0 100644 --- a/resources/numex/mt.yaml +++ b/resources/numex/mt.yaml @@ -267,67 +267,81 @@ value: 20 type: "cardinal" left: "add" + left_separator: " u " - name: "ghoxrin" value: 20 type: "cardinal" left: "add" + left_separator: " u " - name: "tletin" value: 30 type: "cardinal" left: "add" + left_separator: " u " - name: "erbgħin" value: 40 type: "cardinal" left: "add" + left_separator: " u " - name: "erbghin" value: 40 type: "cardinal" left: "add" + left_separator: " u " - name: "ħamsin" value: 50 type: "cardinal" left: "add" + left_separator: " u " - name: "sittin" value: 60 type: "cardinal" left: "add" + left_separator: " u " - name: "sebgħin" value: 70 type: "cardinal" left: "add" + left_separator: " u " - name: "sebghin" value: 70 type: "cardinal" left: "add" + left_separator: " u " - name: "tmenin" value: 80 type: "cardinal" left: "add" + left_separator: " u " - name: "disgħin" value: 90 type: "cardinal" left: "add" + left_separator: " u " - name: "disghin" value: 90 type: "cardinal" left: "add" + left_separator: " u " - name: "mija" value: 100 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " u " - name: "mitt" value: 100 @@ -343,104 +357,129 @@ type: "cardinal" left: "multiply" right: "add" + right_separator: " u " - name: "elfejn" value: 2000 type: "cardinal" right: "add" + right_separator: " u " - name: "tlitt elef" value: 3000 type: "cardinal" right: "add" + right_separator: " u " - name: "erbat elef" value: 4000 type: "cardinal" right: "add" + right_separator: " u " - name: "ħamest elef" value: 5000 type: "cardinal" right: "add" + right_separator: " u " - name: "sitt elef" value: 6000 type: "cardinal" right: "add" + right_separator: " u " - name: "sebat elef" value: 7000 type: "cardinal" right: "add" + right_separator: " u " - name: "tmint elef" value: 8000 type: "cardinal" right: "add" + right_separator: " u " - name: "disat elef" value: 9000 type: "cardinal" right: "add" + right_separator: " u " - name: "għaxart elef" value: 10000 type: "cardinal" right: "add" + right_separator: " u " - name: "ghaxart elef" value: 10000 type: "cardinal" right: "add" + right_separator: " u " - name: "miljun" value: 1000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " u " + exact_multiple_only: true + right_separator: " u " - name: "miljuni" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " u " - name: "biljun" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " u " - name: "biljuni" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " u " - name: "triljun" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " u " - name: "triljuni" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " u " - name: "kvadriljun" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " u " - name: "kvadriljuni" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " u " stopwords: - "u" diff --git a/resources/numex/nb.yaml b/resources/numex/nb.yaml index 528b9261..4d8972e2 100644 --- a/resources/numex/nb.yaml +++ b/resources/numex/nb.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "null" @@ -9,6 +10,7 @@ name: "én" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "en" value: 1 @@ -95,125 +97,301 @@ value: 19 type: "cardinal" - - name: "tyve" + name: "tjue" value: 20 type: "cardinal" - left: "add" + right: "add" - - name: "tredve" + name: "tretti" value: 30 type: "cardinal" - left: "add" + right: "add" - - name: "førr" + name: "førti" value: 40 type: "cardinal" - left: "add" + right: "add" - - name: "forr" + name: "forti" value: 40 type: "cardinal" - left: "add" + right: "add" - name: "femti" value: 50 type: "cardinal" - left: "add" + right: "add" - name: "seksti" value: 60 type: "cardinal" - left: "add" + right: "add" - name: "søtti" value: 70 type: "cardinal" - left: "add" + right: "add" - name: "sotti" value: 70 type: "cardinal" - left: "add" + right: "add" - name: "åtti" value: 80 type: "cardinal" - left: "add" + right: "add" - name: "aatti" value: 80 type: "cardinal" - left: "add" + right: "add" - name: "nitti" value: 90 type: "cardinal" - left: "add" + right: "add" - name: "etthundre" value: 100 type: "cardinal" right: "add" + exact_multiple_only: true + right_separator: " og " - name: "hundre" value: 100 type: "cardinal" left: "multiply" right: "add" + right_separator: " og " - - name: "tusend" + name: "etttusen" value: 1000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " og " + - + name: "tusen" + value: 1000 + type: "cardinal" + left: "multiply" + right: "add" + right_separator: " og " - name: "million" value: 1000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "millioner" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "milliard" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "milliarder" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "billion" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "billioner" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biliard" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biliarder" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " + - + name: "nullte" + value: 0 + type: "ordinal" + - + name: "første" + value: 1 + type: "ordinal" + - + name: "forste" + value: 1 + type: "ordinal" + - + name: "andre" + value: 2 + type: "ordinal" + - + name: "tredje" + value: 3 + type: "ordinal" + - + name: "fjerde" + value: 4 + type: "ordinal" + - + name: "femte" + value: 5 + type: "ordinal" + - + name: "sjette" + value: 6 + type: "ordinal" + - + name: "sjuende" + value: 7 + type: "ordinal" + - + name: "åttende" + value: 8 + type: "ordinal" + - + name: "aattende" + value: 8 + type: "ordinal" + - + name: "niende" + value: 9 + type: "ordinal" + - + name: "tiende" + value: 10 + type: "ordinal" + - + name: "ellevte" + value: 11 + type: "ordinal" + - + name: "tolvte" + value: 12 + type: "ordinal" + - + name: "trettende" + value: 13 + type: "ordinal" + - + name: "fjortende" + value: 14 + type: "ordinal" + - + name: "femtende" + value: 15 + type: "ordinal" + - + name: "sekstende" + value: 16 + type: "ordinal" + - + name: "syttende" + value: 17 + type: "ordinal" + - + name: "attende" + value: 18 + type: "ordinal" + - + name: "nittende" + value: 19 + type: "ordinal" + - + name: "nittende" + value: 19 + type: "ordinal" + - + name: "tjueende" + value: 20 + type: "ordinal" + - + name: "trettiende" + value: 30 + type: "ordinal" + - + name: "førtiende" + value: 40 + type: "ordinal" + - + name: "femtiende" + value: 50 + type: "ordinal" + - + name: "sekstiende" + value: 60 + type: "ordinal" + - + name: "syttiende" + value: 70 + type: "ordinal" + - + name: "åttiende" + value: 80 + type: "ordinal" + - + name: "nittiende" + value: 90 + type: "ordinal" + - + name: "nittiende" + value: 90 + type: "ordinal" + - + name: "hundrede" + value: 100 + type: "ordinal" + - + name: "tusende" + value: 1000 + type: "ordinal" + - + name: "millionte" + value: 1000000 + type: "ordinal" + - + name: "milliardte" + value: 1000000000 + type: "ordinal" + - + name: "billionte" + value: 1000000000000 + type: "ordinal" + - + name: "billiardte" + value: 1000000000000000 + type: "ordinal" ordinal_indicators: - suffixes: diff --git a/resources/numex/nl.yaml b/resources/numex/nl.yaml index bcbdb01a..1b859ab8 100644 --- a/resources/numex/nl.yaml +++ b/resources/numex/nl.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "nul" @@ -86,44 +87,116 @@ value: 20 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­twintig" + value: 22 + type: "cardinal" + - + name: "drie­ën­twintig" + value: 23 + type: "cardinal" - name: "dertig" value: 30 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­dertig" + value: 32 + type: "cardinal" + - + name: "drie­ën­dertig" + value: 33 + type: "cardinal" - name: "veertig" value: 40 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­veertig" + value: 42 + type: "cardinal" + - + name: "drie­ën­veertig" + value: 43 + type: "cardinal" - name: "vijftig" value: 50 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­vijftig" + value: 52 + type: "cardinal" + - + name: "drie­ën­vijftig" + value: 53 + type: "cardinal" - name: "zestig" value: 60 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­zestig" + value: 62 + type: "cardinal" + - + name: "drie­ën­zestig" + value: 60 + type: "cardinal" - name: "zeventig" value: 70 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­zeventig" + value: 72 + type: "cardinal" + - + name: "drie­ën­zeventig" + value: 73 + type: "cardinal" - name: "tachtig" value: 80 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­tachtig" + value: 82 + type: "cardinal" + - + name: "drie­ën­tachtig" + value: 83 + type: "cardinal" - name: "negentig" value: 90 type: "cardinal" left: "add" + left_separator: "en" + - + name: "twee­ën­negentig" + value: 92 + type: "cardinal" + - + name: "drie­ën­negentig" + value: 93 + type: "cardinal" - name: "honderd" - value: 20 + value: 100 type: "cardinal" left: "multiply" right: "add" @@ -139,24 +212,28 @@ type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "miljard" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biljoen" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "biljard" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "nulste" value: 0 @@ -238,41 +315,59 @@ value: 20 type: "ordinal" left: "add" + left_separator: "en" - - name: "dertigste" + name: "twintigste" value: 20 type: "ordinal" left: "add" + - + name: "twintigste" + value: 20 + type: "ordinal" + left: "add" + - + name: "dertigste" + value: 30 + type: "ordinal" + left: "add" + left_separator: "en" - name: "veerigste" value: 40 type: "ordinal" left: "add" + left_separator: "en" - name: "vijftigste" value: 50 type: "ordinal" left: "add" + left_separator: "en" - name: "zestigste" value: 60 type: "ordinal" left: "add" + left_separator: "en" - name: "zeventigste" value: 70 type: "ordinal" left: "add" + left_separator: "en" - name: "tachtigste" value: 80 type: "ordinal" left: "add" + left_separator: "en" - name: "negentigste" value: 90 type: "ordinal" left: "add" + left_separator: "en" - name: "honderdste" value: 100 @@ -286,22 +381,22 @@ - name: "miljoenste" value: 1000000 - type: "cardinal" + type: "ordinal" left: "multiply" - name: "miljardste" value: 1000000000 - type: "cardinal" + type: "ordinal" left: "multiply" - name: "biljoenste" value: 1000000000000 - type: "cardinal" + type: "ordinal" left: "multiply" - name: "biljardste" value: 1000000000000000 - type: "cardinal" + type: "ordinal" left: "multiply" ordinal_indicators: - diff --git a/resources/numex/pt.yaml b/resources/numex/pt.yaml index fe1ddcc9..b9c9eaf7 100644 --- a/resources/numex/pt.yaml +++ b/resources/numex/pt.yaml @@ -5,6 +5,7 @@ name: "um" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "uma" value: 1 @@ -96,193 +97,228 @@ value: 20 type: "cardinal" right: "add" + right_separator: " e " - name: "trinta" value: 30 type: "cardinal" right: "add" + right_separator: " e " - name: "quarenta" value: 40 type: "cardinal" right: "add" + right_separator: " e " - name: "cinquenta" value: 50 type: "cardinal" right: "add" + right_separator: " e " - name: "sessenta" value: 60 type: "cardinal" right: "add" + right_separator: " e " - name: "setenta" value: 70 type: "cardinal" right: "add" + right_separator: " e " - name: "oitenta" value: 80 type: "cardinal" right: "add" + right_separator: " e " - name: "noventa" value: 90 type: "cardinal" right: "add" + right_separator: " e " - name: "cem" value: 100 type: "cardinal" right: "add" + exact_multiple_only: true - name: "cento" value: 100 type: "cardinal" right: "add" + right_separator: " e " - name: "duzentos" value: 200 type: "cardinal" right: "add" + right_separator: " e " - name: "trezentos" value: 300 type: "cardinal" right: "add" + right_separator: " e " - name: "quatrocentos" value: 400 type: "cardinal" right: "add" + right_separator: " e " - name: "quinhentos" value: 500 type: "cardinal" right: "add" + right_separator: " e " - name: "seiscentos" value: 600 type: "cardinal" right: "add" + right_separator: " e " - name: "setecentos" value: 700 type: "cardinal" right: "add" + right_separator: " e " - name: "oitocentos" value: 800 type: "cardinal" right: "add" + right_separator: " e " - name: "novecentos" value: 900 type: "cardinal" right: "add" + right_separator: " e " - name: "mil" value: 1000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "milhão" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "milhao" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "milhões" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "milhoes" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "bilhão" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "bilhao" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "bilhões" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "bilhoes" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "trilhão" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "trilhao" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "trilhões" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "trilhoes" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "quatrilhão" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "quatrilhao" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "quatrilhões" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "quatrilhoes" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " e " - name: "primeiro" value: 1 diff --git a/resources/numex/ro.yaml b/resources/numex/ro.yaml index f3ba0299..cd66dcf1 100644 --- a/resources/numex/ro.yaml +++ b/resources/numex/ro.yaml @@ -13,6 +13,7 @@ name: "una" value: 1 type: "cardinal" + multiply_gte: 100 - name: "doi" value: 2 @@ -114,56 +115,67 @@ value: 20 type: "cardinal" right: "add" + right_separator: " şi " - name: "treizeci" value: 30 type: "cardinal" right: "add" + right_separator: " si " - name: "patruzeci" value: 40 type: "cardinal" right: "add" + right_separator: " şi " - name: "cincizeci" value: 50 type: "cardinal" right: "add" + right_separator: " şi " - name: "şasezeci" value: 60 type: "cardinal" right: "add" + right_separator: " şi " - name: "sasezeci" value: 60 type: "cardinal" right: "add" + right_separator: " si " - name: "şaptezeci" value: 70 type: "cardinal" right: "add" + right_separator: " şi " - name: "saptezeci" value: 70 type: "cardinal" right: "add" + right_separator: " si " - name: "optzeci" value: 80 type: "cardinal" right: "add" + right_separator: " şi " - name: "nouăzeci" value: 90 type: "cardinal" right: "add" + right_separator: " şi " - name: "nouazeci" value: 90 type: "cardinal" right: "add" + right_separator: " si " - name: "sută" value: 100 diff --git a/resources/numex/ru.yaml b/resources/numex/ru.yaml index f10ad91c..9dd67012 100644 --- a/resources/numex/ru.yaml +++ b/resources/numex/ru.yaml @@ -14,11 +14,13 @@ value: 1 type: "cardinal" gender: "f" + multiply_gte: 1000 - name: "одной" value: 1 type: "cardinal" gender: "f" + category: "genitive" - name: "одно" value: 1 diff --git a/resources/numex/sk.yaml b/resources/numex/sk.yaml index f04a1825..27f0d19c 100644 --- a/resources/numex/sk.yaml +++ b/resources/numex/sk.yaml @@ -1,5 +1,6 @@ --- whole_words_only: true + default_separator: "" rules: - name: "nula" @@ -9,11 +10,13 @@ name: "jeden" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "jedna" value: 1 type: "cardinal" gender: "f" + multiply_gte: 100 - name: "jedno" value: 1 @@ -233,119 +236,158 @@ value: 1000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "tisic" value: 1000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milión" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milion" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milióny" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "miliony" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "miliónov" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "milionov" value: 1000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "miliarda" value: 1000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "miliardy" value: 1000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "miliardov" value: 1000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "bilión" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "bilion" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "bilióny" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "biliony" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "biliónov" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "bilionov" value: 1000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "biliarda" value: 1000000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "biliardy" value: 1000000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" + right_separator: " " - name: "biliardov" value: 1000000000000000 type: "cardinal" left: "multiply" + left_separator: " " right: "add" - + right_separator: " " diff --git a/resources/numex/sl.yaml b/resources/numex/sl.yaml index d29aea14..160f279f 100644 --- a/resources/numex/sl.yaml +++ b/resources/numex/sl.yaml @@ -5,6 +5,7 @@ name: "nič" value: 0 type: "cardinal" + multiply_gte: 1000000 - name: "nic" value: 0 diff --git a/resources/numex/sv.yaml b/resources/numex/sv.yaml index 3c1337ac..9adefab3 100644 --- a/resources/numex/sv.yaml +++ b/resources/numex/sv.yaml @@ -6,9 +6,10 @@ value: 0 type: "cardinal" - - name: "et" + name: "en" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "ett" value: 1 @@ -18,11 +19,11 @@ value: 2 type: "cardinal" - - name: "tva" + name: "tvaa" value: 2 type: "cardinal" - - name: "tvaa" + name: "tva" value: 2 type: "cardinal" - @@ -50,11 +51,11 @@ value: 8 type: "cardinal" - - name: "atta" + name: "aatta" value: 8 type: "cardinal" - - name: "aatta" + name: "atta" value: 8 type: "cardinal" - @@ -137,12 +138,12 @@ type: "cardinal" right: "add" - - name: "attio" + name: "aattio" value: 80 type: "cardinal" right: "add" - - name: "aattio" + name: "attio" value: 80 type: "cardinal" right: "add" @@ -151,6 +152,11 @@ value: 90 type: "cardinal" right: "add" + - + name: "etthundra" + value: 100 + type: "cardinal" + right: "add" - name: "hundra" value: 100 @@ -168,50 +174,63 @@ type: "cardinal" left: "multiply" right: "add" + right_separator: " " - - name: "en miljon" + name: "miljon" value: 1000000 type: "cardinal" right: "add" + exact_multiple_only: true + right_separator: " " - name: "miljoner" value: 1000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - - name: "en miljard" + name: "miljard" value: 1000000000 type: "cardinal" right: "add" + exact_multiple_only: true + right_separator: " " - name: "miljarder" value: 1000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - - name: "en biljon" + name: "biljon" value: 1000000000000 type: "cardinal" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biljoner" value: 1000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - - name: "en biljard" + name: "biljard" value: 1000000000000000 type: "cardinal" right: "add" + exact_multiple_only: true + right_separator: " " - name: "biljarder" value: 1000000000000000 type: "cardinal" left: "multiply" right: "add" + right_separator: " " - name: "nollte" value: 0 @@ -293,11 +312,11 @@ value: 8 type: "ordinal" - - name: "attonde" + name: "aattonde" value: 8 type: "ordinal" - - name: "aattonde" + name: "attonde" value: 8 type: "ordinal" - @@ -319,80 +338,80 @@ - name: "trettonde" value: 13 - type: "cardinal" + type: "ordinal" - name: "fjortonde" value: 14 - type: "cardinal" + type: "ordinal" - name: "femtonde" value: 15 - type: "cardinal" + type: "ordinal" - name: "sextonde" value: 16 - type: "cardinal" + type: "ordinal" - name: "sjuttonde" value: 17 - type: "cardinal" + type: "ordinal" - name: "artonde" value: 18 - type: "cardinal" + type: "ordinal" - name: "nittonde" value: 19 - type: "cardinal" + type: "ordinal" - name: "tjugonde" value: 20 - type: "cardinal" + type: "ordinal" - name: "trettionde" value: 30 - type: "cardinal" + type: "ordinal" - name: "fyrtionde" value: 40 - type: "cardinal" + type: "ordinal" - name: "femtionde" value: 50 - type: "cardinal" + type: "ordinal" - name: "sextionde" value: 60 - type: "cardinal" + type: "ordinal" - name: "sjuttionde" value: 70 - type: "cardinal" + type: "ordinal" - name: "åttionde" value: 80 - type: "cardinal" - - - name: "attionde" - value: 80 - type: "cardinal" + type: "ordinal" - name: "aattionde" value: 80 - type: "cardinal" + type: "ordinal" + - + name: "attionde" + value: 80 + type: "ordinal" - name: "nittionde" value: 90 - type: "cardinal" + type: "ordinal" - name: "hundrade" value: 100 - type: "cardinal" + type: "ordinal" left: "multiply" - name: "tusende" value: 1000 - type: "cardinal" + type: "ordinal" left: "multiply" - name: "miljonte" diff --git a/resources/numex/tr.yaml b/resources/numex/tr.yaml index 27be0e6e..466e8b3d 100644 --- a/resources/numex/tr.yaml +++ b/resources/numex/tr.yaml @@ -13,6 +13,7 @@ name: "bir" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "iki" value: 2 diff --git a/resources/numex/uk.yaml b/resources/numex/uk.yaml index 0a299bfb..2bdbd9e3 100644 --- a/resources/numex/uk.yaml +++ b/resources/numex/uk.yaml @@ -9,11 +9,13 @@ name: "один" value: 1 type: "cardinal" + multiply_gte: 1000000 - name: "одна" value: 1 type: "cardinal" gender: "f" + multiply_gte: 1000 - name: "одне" value: 1 diff --git a/resources/numex/zh.yaml b/resources/numex/zh.yaml index 1c7b7bc6..7bd04c51 100644 --- a/resources/numex/zh.yaml +++ b/resources/numex/zh.yaml @@ -1,4 +1,5 @@ --- + default_separator: "" rules: - name: "〇" diff --git a/scripts/geodata/numbers/spellout.py b/scripts/geodata/numbers/spellout.py new file mode 100644 index 00000000..f2020c10 --- /dev/null +++ b/scripts/geodata/numbers/spellout.py @@ -0,0 +1,180 @@ +import bisect +import os +import six +import yaml + +from collections import defaultdict + +from geodata.numbers.numex import NUMEX_DATA_DIR + + +class NumericExpressions(object): + default_separator = ' ' + + def __init__(self, base_dir=NUMEX_DATA_DIR): + self.cardinal_rules = {} + self.cardinal_rules_sorted = {} + self.cardinal_rules_ones = defaultdict(dict) + self.cardinal_rules_ones_sorted = {} + + self.default_separators = {} + + self.ordinal_rules = {} + self.ordinal_suffix_rules = {} + + for filename in os.listdir(base_dir): + if filename.endswith('.yaml'): + lang = filename.split('.yaml')[0] + f = open(os.path.join(base_dir, filename)) + data = yaml.load(f) + + default_separator = data.get('default_separator') + if default_separator is not None: + self.default_separators[lang] = default_separator + + rules = data.get('rules') + if rules is not None and hasattr(rules, '__getslice__'): + cardinals = defaultdict(list) + ordinals = defaultdict(list) + for rule in rules: + name = rule.get('name') + value = rule.get('value') + rule_type = rule.get('type') + if not name or type(value) not in (int, float) or rule_type not in ('cardinal', 'ordinal'): + continue + gender = rule.get('gender', None) + category = rule.get('category', None) + if rule_type == 'ordinal': + ordinals[(value, gender, category)].append(rule) + else: + cardinals[(value, gender, category)].append(rule) + if value == 1 and 'multiply_gte' in rule: + self.cardinal_rules_ones[lang][rule['multiply_gte']] = rule + + self.cardinal_rules[lang] = cardinals + self.ordinal_rules[lang] = ordinals + + self.cardinal_rules_sorted[lang] = sorted(set([v for v, g, c in cardinals])) + self.cardinal_rules_ones_sorted[lang] = sorted(self.cardinal_rules_ones[lang].keys()) + + self.cardinal_rules_ones = dict(self.cardinal_rules_ones) + + def spellout_cardinal(self, num, lang, gender=None, category=None): + num = int(num) + remainder = 0 + + if lang not in self.cardinal_rules: + return None + + rules = self.cardinal_rules.get(lang) + cardinals = self.cardinal_rules_sorted.get(lang) + if not rules or not cardinals: + return None + + default_separator = self.default_separators.get(lang, self.default_separator) + + cardinal_part = [] + + last_rule = {} + left_multiply_rules = [] + + while num: + i = bisect.bisect_left(cardinals, num) + if i > len(cardinals) - 1: + return None + if i > 0 and cardinals[i] > num: + val = cardinals[i - 1] + else: + val = cardinals[i] + + multiple = num // val + + if val == num: + cardinal = rules.get((num, gender, category)) + else: + cardinal = rules.get((val, None, None), []) + + multiple_rule = None + + if multiple > 1: + multiple_val = rules.get((multiple, None, None)) + if multiple_val: + multiple_rule = multiple_val[0] + elif multiple == 1 and lang in self.cardinal_rules_ones_sorted: + ones_rules = self.cardinal_rules_ones_sorted[lang] + j = bisect.bisect_right(ones_rules, val) + if j > 0 and ones_rules[j - 1] <= num: + multiple_rule = self.cardinal_rules_ones[lang][ones_rules[j - 1]] + + use_multiple = multiple > 1 + + is_left_multiply = False + did_left_multiply = False + + if not use_multiple: + rule = cardinal[0] if cardinal else None + else: + for rule in cardinal: + left_multiply = rule.get('left') == 'multiply' + if left_multiply: + if not multiple_rule: + left_multiply_rules.append(rule) + is_left_multiply = True + last_rule = rule + rule = None + break + else: + rule = None + + if rule is not None: + left_add = last_rule.get('left') == 'add' + right_add = last_rule.get('right') == 'add' + + if multiple_rule: + if right_add and cardinal_part: + cardinal_part.append(last_rule.get('left_separator', default_separator)) + cardinal_part.append(multiple_rule['name']) + cardinal_part.append(rule.get('left_separator', default_separator)) + + if right_add: + if not multiple_rule and cardinal_part: + right_separator = last_rule.get('right_separator', default_separator) + cardinal_part.append(right_separator) + cardinal_part.append(rule['name']) + elif left_add and cardinal_part: + last = cardinal_part.pop() + cardinal_part.append(rule['name']) + left_separator = last_rule.get('left_separator', default_separator) + cardinal_part.append(left_separator) + cardinal_part.append(last) + elif not left_add and not right_add: + cardinal_part.append(rule['name']) + + last_rule = rule + + if left_multiply_rules and 'right' not in rule and 'left' not in rule: + left_multiply_rule = left_multiply_rules.pop() + left_separator = left_multiply_rule.get('left_separator', default_separator) + cardinal_part.append(left_separator) + cardinal_part.append(left_multiply_rule['name']) + did_left_multiply = True + last_rule = left_multiply_rule + + if not is_left_multiply and not did_left_multiply: + num -= (multiple * val) + elif not did_left_multiply: + remainder = num % val + num /= val + else: + num = remainder + did_left_multiply = False + + return six.u('').join(cardinal_part) + + def roman_numeral(self, num): + numeral = self.spellout_cardinal(num, 'la') + if numeral is None: + return None + return numeral.upper() + +numeric_expressions = NumericExpressions() diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index dbf010bd..3265cc97 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -399,6 +399,12 @@ class OSMAddressFormatter(object): num_floors = self.num_floors(building_components) num_basements = self.num_floors(building_components, key='building:levels:underground') + building_tags = self.normalize_address_components(tags) + + for k, v in six.iteritems(building_tags): + if k not in revised_tags and k in (AddressFormatter.HOUSE_NUMBER, AddressFormatter.ROAD, AddressFormatter.HOUSE): + revised_tags[k] = v + subdivision_components = self.subdivision_components(latitude, longitude) if subdivision_components: zone = self.zone(subdivision_components)