From 591891951d9d12892ad03655af0edeb9229451ea Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 23 Feb 2018 01:22:58 -0500 Subject: [PATCH] [utils] adding utf8 case insensitive comparison --- src/string_utils.c | 25 ++++++++++++++++++++++--- src/string_utils.h | 2 ++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/string_utils.c b/src/string_utils.c index 950e3004..9febcf92 100644 --- a/src/string_utils.c +++ b/src/string_utils.c @@ -452,7 +452,8 @@ size_t unicode_common_suffix(uint32_array *u1_array, uint32_array *u2_array) { -int utf8_compare_len(const char *str1, const char *str2, size_t len) { + +int utf8_compare_len_option(const char *str1, const char *str2, size_t len, bool case_insensitive) { if (len == 0) return 0; int32_t c1, c2; @@ -469,7 +470,7 @@ int utf8_compare_len(const char *str1, const char *str2, size_t len) { if (c1 == 0 || c2 == 0) break; - if (c1 == c2) { + if (c1 == c2 || (case_insensitive && utf8proc_tolower(c1) == utf8proc_tolower(c2))) { ptr1 += len1; ptr2 += len2; remaining -= len1; @@ -484,8 +485,26 @@ int utf8_compare_len(const char *str1, const char *str2, size_t len) { return (int) c1 - c2; } +inline int utf8_compare_len(const char *str1, const char *str2, size_t len) { + return utf8_compare_len_option(str1, str2, len, false); +} + inline int utf8_compare(const char *str1, const char *str2) { - return utf8_compare_len(str1, str2, strlen(str1)); + size_t len1 = strlen(str1); + size_t len2 = strlen(str2); + size_t max_len = len1 >= len2 ? len1 : len2; + return utf8_compare_len_option(str1, str2, max_len, false); +} + +inline int utf8_compare_len_case_insensitive(const char *str1, const char *str2, size_t len) { + return utf8_compare_len_option(str1, str2, len, true); +} + +inline int utf8_compare_case_insensitive(const char *str1, const char *str2, size_t len) { + size_t len1 = strlen(str1); + size_t len2 = strlen(str2); + size_t max_len = len1 >= len2 ? len1 : len2; + return utf8_compare_len_option(str1, str2, max_len, true); } diff --git a/src/string_utils.h b/src/string_utils.h index 30c583b6..915f9e39 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -78,6 +78,8 @@ char *utf8_upper(const char *s); int utf8_compare(const char *str1, const char *str2); int utf8_compare_len(const char *str1, const char *str2, size_t len); +int utf8_compare_case_insensitive(const char *str1, const char *str2, size_t len); +int utf8_compare_len_case_insensitive(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix(const char *str1, const char *str2); size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len); size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);