From 09fbb02042882bdfa54a0ae23cffa6785c4bf6e3 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 14 Oct 2017 01:36:56 -0400 Subject: [PATCH] [utils] adding utf8_equal_ignore_separators to string utils --- src/string_utils.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++ src/string_utils.h | 2 ++ 2 files changed, 57 insertions(+) diff --git a/src/string_utils.c b/src/string_utils.c index b2dc2bbd..9d27cc37 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -513,6 +513,61 @@ inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); } +bool utf8_equal_ignore_separators_len(const char *str1, const char *str2, size_t len) { + if (len == 0) return false; + + int32_t c1 = -1, c2 = -1; + ssize_t len1, len2; + + uint8_t *ptr1 = (uint8_t *)str1; + uint8_t *ptr2 = (uint8_t *)str2; + + size_t remaining = len; + + while (1) { + len1 = utf8proc_iterate(ptr1, -1, &c1); + len2 = utf8proc_iterate(ptr2, -1, &c2); + + if (len1 < 0 && len2 < 0 && *ptr1 == *ptr2) { + ptr1++; + ptr2++; + remaining--; + if (remaining == 0) return true; + continue; + } + + if (c1 != 0 && c2 != 0 && c1 == c2) { + ptr1 += len1; + ptr2 += len2; + remaining -= len1; + } else if (utf8_is_hyphen(c1) || utf8_is_separator(utf8proc_category(c1))) { + ptr1 += len1; + if (utf8_is_hyphen(c2) || utf8_is_separator(utf8proc_category(c2))) { + ptr2 += len2; + } + remaining -= len1; + } else if (utf8_is_hyphen(c2) || utf8_is_separator(utf8proc_category(c2))) { + ptr2 += len2; + remaining -= len2; + } else { + break; + } + + if (remaining == 0) return true; + + } + + return false; +} + +inline bool utf8_equal_ignore_separators(const char *str1, const char *str2) { + size_t len1 = strlen(str1); + size_t len2 = strlen(str2); + size_t len = len1 > len2 ? len1 : len2; + + return utf8_equal_ignore_separators_len(str1, str2, len); +} + bool string_is_digit(char *str, size_t len) { uint8_t *ptr = (uint8_t *)str; size_t idx = 0; diff --git a/src/string_utils.h b/src/string_utils.h index 0cf0382c..852f1813 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -83,6 +83,8 @@ size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len); +bool utf8_equal_ignore_separators(const char *str1, const char *str2); + ssize_t utf8_len(const char *str, size_t len); bool utf8_is_hyphen(int32_t ch);