[numex] regen data file. utf8_is_hyphen requires a character, all other methods use category

This commit is contained in:
Al
2015-06-08 21:32:01 -04:00
parent c1d0afa52c
commit 81be8e771e
3 changed files with 19 additions and 16 deletions

View File

@@ -722,6 +722,8 @@ numex_rule_source_t numex_rules[] = {
{"octante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 80LL}}, {"octante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 80LL}},
{"quatre vignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}}, {"quatre vignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
{"quatrevignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}}, {"quatrevignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
{"quatre vignt", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
{"quatrevignt", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
{"nonante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 90LL}}, {"nonante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 90LL}},
{"cent", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}}, {"cent", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}},
{"cents", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}}, {"cents", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}},
@@ -1805,6 +1807,7 @@ ordinal_indicator_t ordinal_indicator_rules[] = {
{"1", GENDER_FEMININE, CATEGORY_DEFAULT, "era"}, {"1", GENDER_FEMININE, CATEGORY_DEFAULT, "era"},
{"0", GENDER_FEMININE, CATEGORY_DEFAULT, "a"}, {"0", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
{"3", GENDER_FEMININE, CATEGORY_DEFAULT, "a"}, {"3", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
{"3", GENDER_FEMININE, CATEGORY_DEFAULT, "ra"},
{"2", GENDER_FEMININE, CATEGORY_DEFAULT, "a"}, {"2", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
{"5", GENDER_FEMININE, CATEGORY_DEFAULT, "a"}, {"5", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
{"4", GENDER_FEMININE, CATEGORY_DEFAULT, "a"}, {"4", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
@@ -2182,16 +2185,16 @@ numex_language_source_t numex_languages[] = {
{"da", 141, 42, 38, 0}, {"da", 141, 42, 38, 0},
{"de", 183, 194, 38, 0}, {"de", 183, 194, 38, 0},
{"en", 377, 70, 38, 13}, {"en", 377, 70, 38, 13},
{"es", 447, 133, 51, 28}, {"es", 447, 133, 51, 29},
{"fi", 580, 107, 79, 19}, {"fi", 580, 107, 80, 19},
{"fr", 687, 192, 98, 80}, {"fr", 687, 194, 99, 80},
{"it", 879, 163, 178, 20}, {"it", 881, 163, 179, 20},
{"ja", 1042, 18, 198, 0}, {"ja", 1044, 18, 199, 0},
{"la", 1060, 31, 198, 0}, {"la", 1062, 31, 199, 0},
{"nl", 1091, 68, 198, 184}, {"nl", 1093, 68, 199, 184},
{"pl", 1159, 82, 382, 0}, {"pl", 1161, 82, 383, 0},
{"pt", 1241, 170, 382, 20}, {"pt", 1243, 170, 383, 20},
{"ru", 1411, 192, 402, 20}, {"ru", 1413, 192, 403, 20},
{"sv", 1603, 94, 422, 20}, {"sv", 1605, 94, 423, 20},
{"zh", 1697, 33, 442, 0} {"zh", 1699, 33, 443, 0}
}; };

View File

@@ -142,8 +142,7 @@ error_free_output:
return NULL; return NULL;
} }
inline bool utf8_is_letter(int32_t ch) { inline bool utf8_is_letter(int cat) {
int cat = utf8proc_category(ch);
return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \ return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \
|| cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \ || cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \
|| cat == UTF8PROC_CATEGORY_LM; || cat == UTF8PROC_CATEGORY_LM;
@@ -160,7 +159,8 @@ inline bool utf8_is_letter_or_number(int cat) {
|| cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO; || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
} }
inline bool utf8_is_hyphen(int cat) { inline bool utf8_is_hyphen(int32_t ch) {
int cat = utf8proc_category(ch);
return cat == UTF8PROC_CATEGORY_PD || ch == 0x2212; return cat == UTF8PROC_CATEGORY_PD || ch == 0x2212;
} }

View File

@@ -30,10 +30,10 @@ uint string_translate(char *str, size_t len, char *word_chars, char *word_repls,
char *utf8_reversed_string(const char *s); // returns a copy, caller frees char *utf8_reversed_string(const char *s); // returns a copy, caller frees
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
bool utf8_is_hyphen(int32_t ch);
bool utf8_is_letter(int cat); bool utf8_is_letter(int cat);
bool utf8_is_number(int cat); bool utf8_is_number(int cat);
bool utf8_is_letter_or_number(int cat); bool utf8_is_letter_or_number(int cat);
bool utf8_is_hyphen(int cat);
bool utf8_is_punctuation(int cat); bool utf8_is_punctuation(int cat);
bool utf8_is_symbol(int cat); bool utf8_is_symbol(int cat);
bool utf8_is_separator(int cat); bool utf8_is_separator(int cat);