[phrases] adding token_phrase_memberships to trie_search for reuse
This commit is contained in:
@@ -778,6 +778,34 @@ inline phrase_t trie_search_prefixes(trie_t *self, char *word, size_t len) {
|
|||||||
return trie_search_prefixes_from_index_get_prefix_char(self, word, len, ROOT_NODE_ID);
|
return trie_search_prefixes_from_index_get_prefix_char(self, word, len, ROOT_NODE_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool token_phrase_memberships(phrase_array *phrases, int64_array *phrase_memberships, size_t len) {
|
||||||
|
if (phrases == NULL || phrase_memberships == NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t i = 0;
|
||||||
|
for (int64_t j = 0; j < phrases->n; j++) {
|
||||||
|
phrase_t phrase = phrases->a[j];
|
||||||
|
|
||||||
|
for (; i < phrase.start; i++) {
|
||||||
|
int64_array_push(phrase_memberships, NULL_PHRASE_MEMBERSHIP);
|
||||||
|
log_debug("token i=%lld, null phrase membership\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = phrase.start; i < phrase.start + phrase.len; i++) {
|
||||||
|
log_debug("token i=%lld, phrase membership=%lld\n", i, j);
|
||||||
|
int64_array_push(phrase_memberships, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < len; i++) {
|
||||||
|
log_debug("token i=%lld, null phrase membership\n", i);
|
||||||
|
int64_array_push(phrase_memberships, NULL_PHRASE_MEMBERSHIP);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
inline char *cstring_array_get_phrase(cstring_array *str, char_array *phrase_tokens, phrase_t phrase) {
|
inline char *cstring_array_get_phrase(cstring_array *str, char_array *phrase_tokens, phrase_t phrase) {
|
||||||
char_array_clear(phrase_tokens);
|
char_array_clear(phrase_tokens);
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ typedef struct phrase {
|
|||||||
VECTOR_INIT(phrase_array, phrase_t)
|
VECTOR_INIT(phrase_array, phrase_t)
|
||||||
|
|
||||||
#define NULL_PHRASE (phrase_t){0, 0, 0}
|
#define NULL_PHRASE (phrase_t){0, 0, 0}
|
||||||
|
#define NULL_PHRASE_MEMBERSHIP -1
|
||||||
|
|
||||||
phrase_array *trie_search(trie_t *self, char *text);
|
phrase_array *trie_search(trie_t *self, char *text);
|
||||||
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
bool trie_search_from_index(trie_t *self, char *text, uint32_t start_node_id, phrase_array **phrases);
|
||||||
@@ -40,6 +41,8 @@ phrase_t trie_search_prefixes_from_index(trie_t *self, char *word, size_t len, u
|
|||||||
phrase_t trie_search_prefixes_from_index_get_prefix_char(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
phrase_t trie_search_prefixes_from_index_get_prefix_char(trie_t *self, char *word, size_t len, uint32_t start_node_id);
|
||||||
phrase_t trie_search_prefixes(trie_t *self, char *word, size_t len);
|
phrase_t trie_search_prefixes(trie_t *self, char *word, size_t len);
|
||||||
|
|
||||||
|
bool token_phrase_memberships(phrase_array *phrases, int64_array *phrase_memberships, size_t len);
|
||||||
|
|
||||||
char *cstring_array_get_phrase(cstring_array *str, char_array *phrase_tokens, phrase_t phrase);
|
char *cstring_array_get_phrase(cstring_array *str, char_array *phrase_tokens, phrase_t phrase);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user