diff --git a/src/string_utils.c b/src/string_utils.c index c9da7792..81c852c2 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -1,5 +1,4 @@ #include - #include "string_utils.h" #define INVALID_INDEX(i, n) ((i) < 0 || (i) >= (n)) @@ -219,8 +218,12 @@ int utf8_compare_len(const char *str1, const char *str2, size_t len) { return (int) c1 - c2; } +inline int utf8_compare(const char *str1, const char *str2) { + return utf8_compare_len(str1, str2, strlen(str1)); +} -int utf8_compare_len_ignore_separators(const char *str1, const char *str2, size_t len) { + +size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len) { if (len == 0) return 0; int32_t c1, c2; @@ -231,6 +234,8 @@ int utf8_compare_len_ignore_separators(const char *str1, const char *str2, size_ size_t remaining = len; + size_t match_len = 0; + while (1) { len1 = utf8proc_iterate(ptr1, -1, &c1); len2 = utf8proc_iterate(ptr2, -1, &c2); @@ -241,11 +246,13 @@ int utf8_compare_len_ignore_separators(const char *str1, const char *str2, size_ ptr1 += len1; ptr2 += len2; remaining -= len1; + match_len += len1; } else if (utf8_is_hyphen(c1) || utf8_is_separator(utf8proc_category(c1))) { ptr1 += len1; - remaining -= len1; + match_len += len1; } else if (utf8_is_hyphen(c2) || utf8_is_separator(utf8proc_category(c2))) { ptr2 += len2; + remaining -= len2; } else { break; } @@ -254,36 +261,11 @@ int utf8_compare_len_ignore_separators(const char *str1, const char *str2, size_ } - return (int) c1 - c2; + return match_len; } -int utf8_compare_ignore_separators(const char *str1, const char *str2) { - int32_t c1, c2; - ssize_t len1, len2; - - uint8_t *ptr1 = (uint8_t *)str1; - uint8_t *ptr2 = (uint8_t *)str2; - - while (1) { - len1 = utf8proc_iterate(ptr1, -1, &c1); - len2 = utf8proc_iterate(ptr2, -1, &c2); - - if (c1 == 0 || c2 == 0) break; - - if (c1 == c2) { - ptr1 += len1; - ptr2 += len2; - } else if (utf8_is_hyphen(c1) || utf8_is_separator(utf8proc_category(c1))) { - ptr1 += len1; - } else if (utf8_is_hyphen(c2) || utf8_is_separator(utf8proc_category(c2))) { - ptr2 += len2; - } else { - break; - } - - } - - return (int) c1 - c2; +inline size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2) { + return utf8_common_prefix_len_ignore_separators(str1, str2, strlen(str2)); } diff --git a/src/string_utils.h b/src/string_utils.h index 7eec1fc0..3d1f1799 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -50,8 +50,8 @@ ssize_t utf8proc_iterate_reversed(const uint8_t *str, ssize_t start, int32_t *ds char *utf8_lower(const char *s); // returns a copy, caller frees int utf8_compare(const char *str1, const char *str2); int utf8_compare_len(const char *str1, const char *str2, size_t len); -int utf8_compare_ignore_separators(const char *str1, const char *str2); -int utf8_compare_len_ignore_separators(const char *str1, const char *str2, size_t len); +size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2); +size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len); bool utf8_is_hyphen(int32_t ch); bool utf8_is_letter(int cat);