From bdb51a244e72099b7b96329b4e311bda4490d6ee Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 29 Dec 2016 16:15:33 -0500 Subject: [PATCH] [phrases] fix case in trie search when searching for tokens in a string tail. If we're on the last token in a sequenence and the token matches the tail, check that the tail is complete, and if so return the match before exiting the loop. Affects multiword phrases that tend to appear toward the end of a sequence (long country names like "United States of America", etc.) --- src/trie_search.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/trie_search.c b/src/trie_search.c index c8e2922e..bb5dc262 100644 --- a/src/trie_search.c +++ b/src/trie_search.c @@ -198,11 +198,12 @@ int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, toke if (!(*tail_ptr)) { log_debug("tail matches!\n"); - return token_index-1; + return token_index - 1; } log_debug("Searching tail: %s\n", tail_ptr); - for (int i = token_index; i < tokens->n; i++) { + size_t num_tokens = tokens->n; + for (int i = token_index; i < num_tokens; i++) { token_t token = tokens->a[i]; char *ptr = str + token.offset; @@ -210,7 +211,7 @@ int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, toke if (!(*tail_ptr)) { log_debug("tail matches!\n"); - return i-1; + return i - 1; } if (token.type == WHITESPACE && *tail_ptr == ' ') continue; @@ -224,6 +225,10 @@ int trie_node_search_tail_tokens(trie_t *self, trie_node_t node, char *str, toke if (strncmp((char *)tail_ptr, ptr, token_length) == 0) { tail_ptr += token_length; + + if (i == num_tokens - 1 && !(*tail_ptr)) { + return i; + } } else { return -1; }