From 06835d5c37ee4e399bec8f9f29b948578bf3ef67 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 6 Jun 2015 20:41:07 -0400 Subject: [PATCH] [utils] string_utils category functions take a category instead of a codepoint --- src/string_utils.c | 18 ++++++------------ src/string_utils.h | 14 +++++++------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/string_utils.c b/src/string_utils.c index f34fee8a..0af42682 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -149,39 +149,33 @@ inline bool utf8_is_letter(int32_t ch) { || cat == UTF8PROC_CATEGORY_LM; } -inline bool utf8_is_number(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_number(int cat) { return cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO; } -inline bool utf8_is_letter_or_number(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_letter_or_number(int cat) { return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \ || cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \ || cat == UTF8PROC_CATEGORY_LM || cat == UTF8PROC_CATEGORY_ND \ || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO; } -inline bool utf8_is_hyphen(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_hyphen(int cat) { return cat == UTF8PROC_CATEGORY_PD || ch == 0x2212; } -inline bool utf8_is_punctuation(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_punctuation(int cat) { return cat == UTF8PROC_CATEGORY_PD || cat == UTF8PROC_CATEGORY_PE \ || cat == UTF8PROC_CATEGORY_PF || cat == UTF8PROC_CATEGORY_PI \ || cat == UTF8PROC_CATEGORY_PO || cat == UTF8PROC_CATEGORY_PS; } -inline bool utf8_is_symbol(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_symbol(int cat) { return cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_SC \ || cat == UTF8PROC_CATEGORY_SM || cat == UTF8PROC_CATEGORY_SO; } -inline bool utf8_is_separator(int32_t ch) { - int cat = utf8proc_category(ch); +inline bool utf8_is_separator(int cat) { return cat == UTF8PROC_CATEGORY_ZS || cat == UTF8PROC_CATEGORY_ZL || cat == UTF8PROC_CATEGORY_ZP; } diff --git a/src/string_utils.h b/src/string_utils.h index 5163a44b..a2be888b 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -30,13 +30,13 @@ uint string_translate(char *str, size_t len, char *word_chars, char *word_repls, char *utf8_reversed_string(const char *s); // returns a copy, caller frees ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); -bool utf8_is_letter(int32_t ch); -bool utf8_is_number(int32_t ch); -bool utf8_is_letter_or_number(int32_t ch); -bool utf8_is_hyphen(int32_t ch); -bool utf8_is_punctuation(int32_t ch); -bool utf8_is_symbol(int32_t ch); -bool utf8_is_separator(int32_t ch); +bool utf8_is_letter(int cat); +bool utf8_is_number(int cat); +bool utf8_is_letter_or_number(int cat); +bool utf8_is_hyphen(int cat); +bool utf8_is_punctuation(int cat); +bool utf8_is_symbol(int cat); +bool utf8_is_separator(int cat); size_t string_ltrim(char *str); size_t string_rtrim(char *str);