[utils] adding string_is_digit function, similar to Python\'s (i.e. counts if it's in the Nd unicode category)

2017-03-15 13:04:39 -04:00
parent 1a1f0a44d2
commit 1b2696b3b5
2 changed files with 27 additions and 0 deletions
--- a/src/string_utils.c
+++ b/src/string_utils.c
@@ -482,6 +482,32 @@ inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char
    return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2));
 }
 bool string_is_digit(char *str, size_t len) {
    uint8_t *ptr = (uint8_t *)str;
    size_t idx = 0;
    bool ignorable = true;
    while (idx < len) {
        int32_t ch;
        ssize_t char_len = utf8proc_iterate(ptr, len, &ch);
        if (char_len <= 0) break;
        if (ch == 0) break;
        if (!(utf8proc_codepoint_valid(ch))) return false;
        int cat = utf8proc_category(ch);
        if (cat != UTF8PROC_CATEGORY_ND) {
            return false;
        }
        ptr += char_len;
        idx += char_len;
    }
    return true;
 }
 bool string_is_ignorable(char *str, size_t len) {
    uint8_t *ptr = (uint8_t *)str;
    size_t idx = 0;
--- a/src/string_utils.h
+++ b/src/string_utils.h
@@ -92,6 +92,7 @@ bool utf8_is_symbol(int cat);
 bool utf8_is_separator(int cat);
 bool utf8_is_whitespace(int32_t ch);
 bool string_is_digit(char *str, size_t len);
 bool string_is_ignorable(char *str, size_t len);
 ssize_t string_next_hyphen_index(char *str, size_t len);