[utils] string_utils category functions take a category instead of a codepoint

This commit is contained in:
Al
2015-06-06 20:41:07 -04:00
parent fc250724e1
commit 06835d5c37
2 changed files with 13 additions and 19 deletions

View File

@@ -149,39 +149,33 @@ inline bool utf8_is_letter(int32_t ch) {
|| cat == UTF8PROC_CATEGORY_LM;
}
inline bool utf8_is_number(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_number(int cat) {
return cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
}
inline bool utf8_is_letter_or_number(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_letter_or_number(int cat) {
return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \
|| cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \
|| cat == UTF8PROC_CATEGORY_LM || cat == UTF8PROC_CATEGORY_ND \
|| cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
}
inline bool utf8_is_hyphen(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_hyphen(int cat) {
return cat == UTF8PROC_CATEGORY_PD || ch == 0x2212;
}
inline bool utf8_is_punctuation(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_punctuation(int cat) {
return cat == UTF8PROC_CATEGORY_PD || cat == UTF8PROC_CATEGORY_PE \
|| cat == UTF8PROC_CATEGORY_PF || cat == UTF8PROC_CATEGORY_PI \
|| cat == UTF8PROC_CATEGORY_PO || cat == UTF8PROC_CATEGORY_PS;
}
inline bool utf8_is_symbol(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_symbol(int cat) {
return cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_SC \
|| cat == UTF8PROC_CATEGORY_SM || cat == UTF8PROC_CATEGORY_SO;
}
inline bool utf8_is_separator(int32_t ch) {
int cat = utf8proc_category(ch);
inline bool utf8_is_separator(int cat) {
return cat == UTF8PROC_CATEGORY_ZS || cat == UTF8PROC_CATEGORY_ZL || cat == UTF8PROC_CATEGORY_ZP;
}

View File

@@ -30,13 +30,13 @@ uint string_translate(char *str, size_t len, char *word_chars, char *word_repls,
char *utf8_reversed_string(const char *s); // returns a copy, caller frees
ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst);
bool utf8_is_letter(int32_t ch);
bool utf8_is_number(int32_t ch);
bool utf8_is_letter_or_number(int32_t ch);
bool utf8_is_hyphen(int32_t ch);
bool utf8_is_punctuation(int32_t ch);
bool utf8_is_symbol(int32_t ch);
bool utf8_is_separator(int32_t ch);
bool utf8_is_letter(int cat);
bool utf8_is_number(int cat);
bool utf8_is_letter_or_number(int cat);
bool utf8_is_hyphen(int cat);
bool utf8_is_punctuation(int cat);
bool utf8_is_symbol(int cat);
bool utf8_is_separator(int cat);
size_t string_ltrim(char *str);
size_t string_rtrim(char *str);