[utils] adding utf8_len function for strings, and utf8_is_digit
This commit is contained in:
@@ -293,6 +293,10 @@ inline bool utf8_is_letter(int cat) {
|
|||||||
|| cat == UTF8PROC_CATEGORY_LM;
|
|| cat == UTF8PROC_CATEGORY_LM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool utf8_is_digit(int cat) {
|
||||||
|
return cat == UTF8PROC_CATEGORY_ND;
|
||||||
|
}
|
||||||
|
|
||||||
inline bool utf8_is_number(int cat) {
|
inline bool utf8_is_number(int cat) {
|
||||||
return cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
|
return cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_NL || cat == UTF8PROC_CATEGORY_NO;
|
||||||
}
|
}
|
||||||
@@ -336,6 +340,34 @@ inline bool utf8_is_whitespace(int32_t ch) {
|
|||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ssize_t utf8_len(const char *str, size_t len) {
|
||||||
|
if (str == NULL) return -1;
|
||||||
|
if (len == 0) return 0;
|
||||||
|
|
||||||
|
int32_t ch = 0;
|
||||||
|
ssize_t num_utf8_chars = 0;
|
||||||
|
ssize_t char_len;
|
||||||
|
|
||||||
|
uint8_t *ptr = (uint8_t *)str;
|
||||||
|
|
||||||
|
size_t remaining = len;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
char_len = utf8proc_iterate(ptr, -1, &ch);
|
||||||
|
|
||||||
|
if (ch == 0) break;
|
||||||
|
remaining -= char_len;
|
||||||
|
if (remaining == 0) break;
|
||||||
|
|
||||||
|
ptr += char_len;
|
||||||
|
num_utf8_chars += char_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return num_utf8_chars;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int utf8_compare_len(const char *str1, const char *str2, size_t len) {
|
int utf8_compare_len(const char *str1, const char *str2, size_t len) {
|
||||||
if (len == 0) return 0;
|
if (len == 0) return 0;
|
||||||
|
|
||||||
|
|||||||
@@ -83,6 +83,8 @@ size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
|||||||
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
||||||
|
|
||||||
|
ssize_t utf8_len(const char *str, size_t len);
|
||||||
|
|
||||||
bool utf8_is_hyphen(int32_t ch);
|
bool utf8_is_hyphen(int32_t ch);
|
||||||
bool utf8_is_letter(int cat);
|
bool utf8_is_letter(int cat);
|
||||||
bool utf8_is_number(int cat);
|
bool utf8_is_number(int cat);
|
||||||
|
|||||||
Reference in New Issue
Block a user