From 1b2696b3b5703bb38d4e8165abb5854bd170cfb1 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 15 Mar 2017 13:04:39 -0400 Subject: [PATCH] [utils] adding string_is_digit function, similar to Python\'s (i.e. counts if it's in the Nd unicode category) --- src/string_utils.c | 26 ++++++++++++++++++++++++++ src/string_utils.h | 1 + 2 files changed, 27 insertions(+) diff --git a/src/string_utils.c b/src/string_utils.c index b8333a84..a2ccc6a1 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -482,6 +482,32 @@ inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); } +bool string_is_digit(char *str, size_t len) { + uint8_t *ptr = (uint8_t *)str; + size_t idx = 0; + + bool ignorable = true; + + while (idx < len) { + int32_t ch; + ssize_t char_len = utf8proc_iterate(ptr, len, &ch); + + if (char_len <= 0) break; + if (ch == 0) break; + if (!(utf8proc_codepoint_valid(ch))) return false; + + int cat = utf8proc_category(ch); + if (cat != UTF8PROC_CATEGORY_ND) { + return false; + } + + ptr += char_len; + idx += char_len; + } + + return true; +} + bool string_is_ignorable(char *str, size_t len) { uint8_t *ptr = (uint8_t *)str; size_t idx = 0; diff --git a/src/string_utils.h b/src/string_utils.h index d3268bc0..0e7dd235 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -92,6 +92,7 @@ bool utf8_is_symbol(int cat); bool utf8_is_separator(int cat); bool utf8_is_whitespace(int32_t ch); +bool string_is_digit(char *str, size_t len); bool string_is_ignorable(char *str, size_t len); ssize_t string_next_hyphen_index(char *str, size_t len);