From 528dd05983a3bdf897a0de19949f2901fd75358d Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 4 Jun 2015 14:49:12 -0400 Subject: [PATCH] [numex] Adding utf8_is_number_or_letter --- src/string_utils.c | 8 ++++++++ src/string_utils.h | 1 + 2 files changed, 9 insertions(+) diff --git a/src/string_utils.c b/src/string_utils.c index bd63b056..1c5db686 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -154,6 +154,14 @@ inline bool utf8_is_number(int32_t ch) { return cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO; } +inline bool utf8_is_letter_or_number(int32_t ch) { + int cat = utf8proc_category(ch); + return cat == UTF8PROC_CATEGORY_LL || cat == UTF8PROC_CATEGORY_LU \ + || cat == UTF8PROC_CATEGORY_LT || cat == UTF8PROC_CATEGORY_LO \ + || cat == UTF8PROC_CATEGORY_LM || cat == UTF8PROC_CATEGORY_ND \ + || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO; +} + inline bool utf8_is_hyphen(int32_t ch) { int cat = utf8proc_category(ch); return cat == UTF8PROC_CATEGORY_PD || c == 0x2212; diff --git a/src/string_utils.h b/src/string_utils.h index d1bc634c..5163a44b 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -32,6 +32,7 @@ char *utf8_reversed_string(const char *s); // returns a copy, caller frees ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *dst); bool utf8_is_letter(int32_t ch); bool utf8_is_number(int32_t ch); +bool utf8_is_letter_or_number(int32_t ch); bool utf8_is_hyphen(int32_t ch); bool utf8_is_punctuation(int32_t ch); bool utf8_is_symbol(int32_t ch);