[fix] Trie prefix search tail comparison
This commit is contained in:
@@ -252,14 +252,18 @@ inline int utf8_compare(const char *str1, const char *str2) {
|
|||||||
return utf8_compare_len(str1, str2, strlen(str1));
|
return utf8_compare_len(str1, str2, strlen(str1));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t utf8_common_prefix(const char *str1, const char *str2) {
|
|
||||||
|
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len) {
|
||||||
size_t common_prefix = 0;
|
size_t common_prefix = 0;
|
||||||
|
|
||||||
|
if (len == 0) return common_prefix;
|
||||||
|
|
||||||
int32_t c1 = 0;
|
int32_t c1 = 0;
|
||||||
int32_t c2 = 0;
|
int32_t c2 = 0;
|
||||||
|
|
||||||
size_t len1 = strlen(str1);
|
size_t remaining = len;
|
||||||
size_t len2 = strlen(str2);
|
|
||||||
|
ssize_t len1, len2;
|
||||||
|
|
||||||
uint8_t *ptr1 = (uint8_t *)str1;
|
uint8_t *ptr1 = (uint8_t *)str1;
|
||||||
uint8_t *ptr2 = (uint8_t *)str2;
|
uint8_t *ptr2 = (uint8_t *)str2;
|
||||||
@@ -273,6 +277,9 @@ size_t utf8_common_prefix(const char *str1, const char *str2) {
|
|||||||
ptr1 += len1;
|
ptr1 += len1;
|
||||||
ptr2 += len2;
|
ptr2 += len2;
|
||||||
common_prefix += len1;
|
common_prefix += len1;
|
||||||
|
if (common_prefix >= len) {
|
||||||
|
return common_prefix;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -281,6 +288,15 @@ size_t utf8_common_prefix(const char *str1, const char *str2) {
|
|||||||
return common_prefix;
|
return common_prefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t utf8_common_prefix(const char *str1, const char *str2) {
|
||||||
|
size_t len1 = strlen(str1);
|
||||||
|
size_t len2 = strlen(str2);
|
||||||
|
|
||||||
|
size_t len = len1 <= len2 ? len1 : len2;
|
||||||
|
|
||||||
|
return utf8_common_prefix_len(str1, str2, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len) {
|
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len) {
|
||||||
if (len == 0) return 0;
|
if (len == 0) return 0;
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ char *utf8_lower(const char *s); // returns a copy, caller frees
|
|||||||
int utf8_compare(const char *str1, const char *str2);
|
int utf8_compare(const char *str1, const char *str2);
|
||||||
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
int utf8_compare_len(const char *str1, const char *str2, size_t len);
|
||||||
size_t utf8_common_prefix(const char *str1, const char *str2);
|
size_t utf8_common_prefix(const char *str1, const char *str2);
|
||||||
|
size_t utf8_common_prefix_len(const char *str1, const char *str2, size_t len);
|
||||||
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
size_t utf8_common_prefix_ignore_separators(const char *str1, const char *str2);
|
||||||
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
size_t utf8_common_prefix_len_ignore_separators(const char *str1, const char *str2, size_t len);
|
||||||
|
|
||||||
|
|||||||
@@ -696,10 +696,11 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match_len += utf8_common_prefix_len_ignore_separators((char *)ptr + char_len, (char *)current_tail + tail_pos, current_tail_len - tail_pos);
|
size_t tail_match_len = utf8_common_prefix_len((char *)ptr + char_len, (char *)current_tail + tail_pos, current_tail_len - tail_pos);
|
||||||
|
match_len += tail_match_len;
|
||||||
log_debug("match_len=%zu\n", match_len);
|
log_debug("match_len=%zu\n", match_len);
|
||||||
|
|
||||||
if (match_len >= current_tail_len) {
|
if (tail_match_len == current_tail_len - tail_pos) {
|
||||||
if (first_char) phrase_start = idx;
|
if (first_char) phrase_start = idx;
|
||||||
phrase_len = (uint32_t)(idx + match_len) - phrase_start;
|
phrase_len = (uint32_t)(idx + match_len) - phrase_start;
|
||||||
|
|
||||||
@@ -717,7 +718,7 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
|||||||
if (terminal_node.check == node_id) {
|
if (terminal_node.check == node_id) {
|
||||||
log_debug("Transition to NUL byte matched\n");
|
log_debug("Transition to NUL byte matched\n");
|
||||||
if (terminal_node.base < 0) {
|
if (terminal_node.base < 0) {
|
||||||
phrase_len = idx + char_len - phrase_start;
|
phrase_len = (uint32_t)(idx + char_len) - phrase_start;
|
||||||
data_node = trie_get_data_node(self, terminal_node);
|
data_node = trie_get_data_node(self, terminal_node);
|
||||||
value = data_node.data;
|
value = data_node.data;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user