diff --git a/README.md b/README.md index 517eee6a..2955270c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Here's a short list of some less straightforward normalizations in various langu | One-hundred twenty E 96th St | 120 east 96th street | | C/ Ocho, P.I. 4 | calle 8 polígono industrial 4 | | V XX Settembre, 20 | via 20 settembre 20 | -| Quatre vignt douze R. de l'Église | 92 rue de l' église | +| Quatre vingt douze R. de l'Église | 92 rue de l' église | | ул Каретный Ряд, д 4, строение 7 | улица каретныи ряд дом 4 строение 7 | | ул Каретный Ряд, д 4, строение 7 | ulitsa karetnyy ryad dom 4 stroyeniye 7 | | Marktstrasse 14 | markt straße 14 | @@ -50,7 +50,7 @@ Here's an example using the Python bindings for succinctness (most of the higher ```python from postal.expand import expand_address -expansions = expand_address('Quatre-vignt-douze Ave des Champs-Élysées') +expansions = expand_address('Quatre-vingt-douze Ave des Champs-Élysées') assert '92 avenue des champs-elysees' in set(expansions) ``` @@ -70,7 +70,7 @@ int main(int argc, char **argv) { size_t num_expansions; normalize_options_t options = get_libpostal_default_options(); - char **expansions = expand_address("Quatre-vignt-douze Ave des Champs-Élysées", options, &num_expansions); + char **expansions = expand_address("Quatre-vingt-douze Ave des Champs-Élysées", options, &num_expansions); for (size_t i = 0; i < num_expansions; i++) { printf("%s\n", expansions[i]); @@ -220,7 +220,7 @@ After building libpostal: ``` cd src/ -./libpostal "Quatre vignt douze Ave des Champs-Élysées" +./libpostal "Quatre vingt douze Ave des Champs-Élysées" ``` If you have a text file or stream with one address per line, the command-line interface also accepts input from stdin: @@ -310,7 +310,7 @@ The dictionaries are also used to abbreviate canonical phrases like "Calle" => " (performed on both the language classifier and the address parser training sets) - **Numeric expression parsing** ("twenty first" => 21st, -"quatre-vignt-douze" => 92, again using data provided in CLDR), supports > 30 +"quatre-vingt-douze" => 92, again using data provided in CLDR), supports > 30 languages. Handles languages with concatenated expressions e.g. milleottocento => 1800. Optionally normalizes Roman numerals regardless of the language (IX => 9) which occur in the names of many monarchs, popes, etc. diff --git a/resources/numex/fr.json b/resources/numex/fr.json index 7dd32243..ec47958e 100644 --- a/resources/numex/fr.json +++ b/resources/numex/fr.json @@ -173,28 +173,28 @@ "right": "add" }, { - "name": "quatre vignts", + "name": "quatre vingts", "value": 80, "type": "cardinal", "radix": 20, "right": "add" }, { - "name": "quatrevignts", + "name": "quatrevingts", "value": 80, "type": "cardinal", "radix": 20, "right": "add" }, { - "name": "quatre vignt", + "name": "quatre vingt", "value": 80, "type": "cardinal", "radix": 20, "right": "add" }, { - "name": "quatrevignt", + "name": "quatrevingt", "value": 80, "type": "cardinal", "radix": 20, @@ -814,23 +814,23 @@ "category": "plural" }, { - "name": "vigntieme", + "name": "vingtieme", "value": 20, "type": "ordinal" }, { - "name": "vigntième", + "name": "vingtième", "value": 20, "type": "ordinal" }, { - "name": "vigntiemes", + "name": "vingtiemes", "value": 20, "type": "ordinal", "category": "plural" }, { - "name": "vigntièmes", + "name": "vingtièmes", "value": 20, "type": "ordinal", "category": "plural" @@ -946,45 +946,45 @@ "category": "plural" }, { - "name": "quatre vigntieme", + "name": "quatre vingtieme", "value": 80, "type": "ordinal" }, { - "name": "quatre vigntième", + "name": "quatre vingtième", "value": 80, "type": "ordinal" }, { - "name": "quatre vigntiemes", + "name": "quatre vingtiemes", "value": 80, "type": "ordinal", "category": "plural" }, { - "name": "quatre vigntièmes", + "name": "quatre vingtièmes", "value": 80, "type": "ordinal", "category": "plural" }, { - "name": "quatrevigntieme", + "name": "quatrevingtieme", "value": 80, "type": "ordinal" }, { - "name": "quatrevigntième", + "name": "quatrevingtième", "value": 80, "type": "ordinal" }, { - "name": "quatrevigntiemes", + "name": "quatrevingtiemes", "value": 80, "type": "ordinal", "category": "plural" }, { - "name": "quatrevigntièmes", + "name": "quatrevingtièmes", "value": 80, "type": "ordinal", "category": "plural" @@ -1188,4 +1188,4 @@ "stopwords": [ "et" ] -} \ No newline at end of file +} diff --git a/test/test_numex.c b/test/test_numex.c index f01708cb..5f0c7639 100644 --- a/test/test_numex.c +++ b/test/test_numex.c @@ -35,9 +35,9 @@ TEST test_numeric_expressions(void) { CHECK_CALL(test_numex("ten and four", "10 and 4", "en")); // French (Celtic-style) numbers - CHECK_CALL(test_numex("quatre-vignt-douze", "92", "fr")); - CHECK_CALL(test_numex("quatre vignt douze", "92", "fr")); - CHECK_CALL(test_numex("quatre vignts", "80", "fr")); + CHECK_CALL(test_numex("quatre-vingt-douze", "92", "fr")); + CHECK_CALL(test_numex("quatre vingt douze", "92", "fr")); + CHECK_CALL(test_numex("quatre vingts", "80", "fr")); CHECK_CALL(test_numex("soixante-et-onze", "71", "fr")); CHECK_CALL(test_numex("soixante-cinq", "65", "fr"));