[numex] regen data file. utf8_is_hyphen requires a character, all other methods use category
This commit is contained in:
@@ -722,6 +722,8 @@ numex_rule_source_t numex_rules[] = {
|
||||
{"octante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 80LL}},
|
||||
{"quatre vignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
|
||||
{"quatrevignts", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
|
||||
{"quatre vignt", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
|
||||
{"quatrevignt", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 20, 80LL}},
|
||||
{"nonante", (numex_rule_t){NUMEX_LEFT_CONTEXT_NONE, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 90LL}},
|
||||
{"cent", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}},
|
||||
{"cents", (numex_rule_t){NUMEX_LEFT_CONTEXT_MULTIPLY, NUMEX_RIGHT_CONTEXT_ADD, NUMEX_CARDINAL_RULE, GENDER_NONE, CATEGORY_DEFAULT, 10, 100LL}},
|
||||
@@ -1805,6 +1807,7 @@ ordinal_indicator_t ordinal_indicator_rules[] = {
|
||||
{"1", GENDER_FEMININE, CATEGORY_DEFAULT, "era"},
|
||||
{"0", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
|
||||
{"3", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
|
||||
{"3", GENDER_FEMININE, CATEGORY_DEFAULT, "ra"},
|
||||
{"2", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
|
||||
{"5", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
|
||||
{"4", GENDER_FEMININE, CATEGORY_DEFAULT, "a"},
|
||||
@@ -2182,16 +2185,16 @@ numex_language_source_t numex_languages[] = {
|
||||
{"da", 141, 42, 38, 0},
|
||||
{"de", 183, 194, 38, 0},
|
||||
{"en", 377, 70, 38, 13},
|
||||
{"es", 447, 133, 51, 28},
|
||||
{"fi", 580, 107, 79, 19},
|
||||
{"fr", 687, 192, 98, 80},
|
||||
{"it", 879, 163, 178, 20},
|
||||
{"ja", 1042, 18, 198, 0},
|
||||
{"la", 1060, 31, 198, 0},
|
||||
{"nl", 1091, 68, 198, 184},
|
||||
{"pl", 1159, 82, 382, 0},
|
||||
{"pt", 1241, 170, 382, 20},
|
||||
{"ru", 1411, 192, 402, 20},
|
||||
{"sv", 1603, 94, 422, 20},
|
||||
{"zh", 1697, 33, 442, 0}
|
||||
{"es", 447, 133, 51, 29},
|
||||
{"fi", 580, 107, 80, 19},
|
||||
{"fr", 687, 194, 99, 80},
|
||||
{"it", 881, 163, 179, 20},
|
||||
{"ja", 1044, 18, 199, 0},
|
||||
{"la", 1062, 31, 199, 0},
|
||||
{"nl", 1093, 68, 199, 184},
|
||||
{"pl", 1161, 82, 383, 0},
|
||||
{"pt", 1243, 170, 383, 20},
|
||||
{"ru", 1413, 192, 403, 20},
|
||||
{"sv", 1605, 94, 423, 20},
|
||||
{"zh", 1699, 33, 443, 0}
|
||||
};
|
||||
|
||||
@@ -142,8 +142,7 @@ error_free_output:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline bool utf8_is_letter(int32_t ch) {
|
||||
int cat = utf8proc_category(ch);
|
||||
inline bool utf8_is_letter(int cat) {
|
||||
return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \
|
||||
|| cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \
|
||||
|| cat == UTF8PROC_CATEGORY_LM;
|
||||
@@ -160,7 +159,8 @@ inline bool utf8_is_letter_or_number(int cat) {
|
||||
|| cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
|
||||
}
|
||||
|
||||
inline bool utf8_is_hyphen(int cat) {
|
||||
inline bool utf8_is_hyphen(int32_t ch) {
|
||||
int cat = utf8proc_category(ch);
|
||||
return cat == UTF8PROC_CATEGORY_PD || ch == 0x2212;
|
||||
}
|
||||
|
||||
|
||||
@@ -30,10 +30,10 @@ uint string_translate(char *str, size_t len, char *word_chars, char *word_repls,
|
||||
|
||||
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
|
||||
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
|
||||
bool utf8_is_hyphen(int32_t ch);
|
||||
bool utf8_is_letter(int cat);
|
||||
bool utf8_is_number(int cat);
|
||||
bool utf8_is_letter_or_number(int cat);
|
||||
bool utf8_is_hyphen(int cat);
|
||||
bool utf8_is_punctuation(int cat);
|
||||
bool utf8_is_symbol(int cat);
|
||||
bool utf8_is_separator(int cat);
|
||||
|
||||
Reference in New Issue
Block a user