[fix] giving constant trie node names more specificity

This commit is contained in:
Al
2015-05-18 14:24:39 -04:00
parent c66f6f0fbe
commit eecee39904
3 changed files with 35 additions and 35 deletions

View File

@@ -106,7 +106,7 @@ inline bool trie_node_is_free(trie_node_t node) {
} }
inline trie_node_t trie_get_node(trie_t *self, uint32_t index) { inline trie_node_t trie_get_node(trie_t *self, uint32_t index) {
if ((index >= self->nodes->n) || index < ROOT_ID) return self->null_node; if ((index >= self->nodes->n) || index < ROOT_NODE_ID) return self->null_node;
return self->nodes->a[index]; return self->nodes->a[index];
} }
@@ -122,7 +122,7 @@ inline void trie_set_check(trie_t *self, uint32_t index, int32_t check) {
inline trie_node_t trie_get_root(trie_t *self) { inline trie_node_t trie_get_root(trie_t *self) {
return self->nodes->a[ROOT_ID]; return self->nodes->a[ROOT_NODE_ID];
} }
inline trie_node_t trie_get_free_list(trie_t *self) { inline trie_node_t trie_get_free_list(trie_t *self) {
@@ -233,7 +233,7 @@ static void trie_prune_up_to(trie_t *self, uint32_t p, uint32_t s) {
} }
static void trie_prune(trie_t *self, uint32_t s) { static void trie_prune(trie_t *self, uint32_t s) {
trie_prune_up_to(self, ROOT_ID, s); trie_prune_up_to(self, ROOT_NODE_ID, s);
} }
static void trie_get_transition_chars(trie_t *self, uint32_t node_id, unsigned char *transitions, uint32_t *num_transitions) { static void trie_get_transition_chars(trie_t *self, uint32_t node_id, unsigned char *transitions, uint32_t *num_transitions) {
@@ -621,8 +621,8 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data)
unsigned char *ptr = (unsigned char *)key; unsigned char *ptr = (unsigned char *)key;
uint32_t last_node_id = node_id; uint32_t last_node_id = node_id;
trie_node_t last_node = trie_get_node(self, node_id); trie_node_t last_node = trie_get_node(self, node_id);
if (last_node.base == NULL_ID) { if (last_node.base == NULL_NODE_ID) {
log_debug("last_node.base == NULL_ID, node_id = %d\n", node_id); log_debug("last_node.base == NULL_NODE_ID, node_id = %d\n", node_id);
return false; return false;
} }
@@ -636,7 +636,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data)
node_id = trie_get_transition_index(self, last_node, *ptr); node_id = trie_get_transition_index(self, last_node, *ptr);
log_debug("node_id=%d, last_node.base=%d, last_node.check=%d, char_index=%d\n", node_id, last_node.base, last_node.check, trie_get_char_index(self, *ptr)); log_debug("node_id=%d, last_node.base=%d, last_node.check=%d, char_index=%d\n", node_id, last_node.base, last_node.check, trie_get_char_index(self, *ptr));
if (node_id != NULL_ID) { if (node_id != NULL_NODE_ID) {
trie_make_room_for(self, node_id); trie_make_room_for(self, node_id);
} }
@@ -660,7 +660,7 @@ bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data)
bool trie_add(trie_t *self, char *key, uint32_t data) { bool trie_add(trie_t *self, char *key, uint32_t data) {
if (strlen(key) == 0) return false; if (strlen(key) == 0) return false;
return trie_add_at_index(self, ROOT_ID, key, data); return trie_add_at_index(self, ROOT_NODE_ID, key, data);
} }
bool trie_add_suffix(trie_t *self, char *key, uint32_t data) { bool trie_add_suffix(trie_t *self, char *key, uint32_t data) {
@@ -669,8 +669,8 @@ bool trie_add_suffix(trie_t *self, char *key, uint32_t data) {
uint32_t node_id = trie_get_transition_index(self, root, '\0'); uint32_t node_id = trie_get_transition_index(self, root, '\0');
trie_node_t node = trie_get_node(self, node_id); trie_node_t node = trie_get_node(self, node_id);
if (node.check != ROOT_ID) { if (node.check != ROOT_NODE_ID) {
node_id = trie_add_transition(self, ROOT_ID, '\0'); node_id = trie_add_transition(self, ROOT_NODE_ID, '\0');
} }
char *suffix = utf8_reversed_string(key); char *suffix = utf8_reversed_string(key);
@@ -681,15 +681,15 @@ bool trie_add_suffix(trie_t *self, char *key, uint32_t data) {
uint32_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_t i) { uint32_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_t i) {
if (key == NULL) return NULL_ID; if (key == NULL) return NULL_NODE_ID;
unsigned char *ptr = (unsigned char *)key; unsigned char *ptr = (unsigned char *)key;
uint32_t node_id = i; uint32_t node_id = i;
trie_node_t node = trie_get_node(self, i); trie_node_t node = trie_get_node(self, i);
if (node.base == NULL_ID) return NULL_ID; if (node.base == NULL_NODE_ID) return NULL_NODE_ID;
uint32_t next_id = NULL_ID; uint32_t next_id = NULL_NODE_ID;
// Include NUL-byte. It may be stored if this phrase is a prefix of a longer one // Include NUL-byte. It may be stored if this phrase is a prefix of a longer one
@@ -698,7 +698,7 @@ uint32_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_
node = trie_get_node(self, next_id); node = trie_get_node(self, next_id);
if (node.check != node_id) { if (node.check != node_id) {
return NULL_ID; return NULL_NODE_ID;
} }
} }
@@ -707,21 +707,21 @@ uint32_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_
} }
uint32_t trie_get_prefix_len(trie_t *self, char *key, size_t len) { uint32_t trie_get_prefix_len(trie_t *self, char *key, size_t len) {
return trie_get_prefix_from_index(self, key, len, ROOT_ID); return trie_get_prefix_from_index(self, key, len, ROOT_NODE_ID);
} }
uint32_t trie_get_prefix(trie_t *self, char *key) { uint32_t trie_get_prefix(trie_t *self, char *key) {
return trie_get_prefix_from_index(self, key, strlen(key), ROOT_ID); return trie_get_prefix_from_index(self, key, strlen(key), ROOT_NODE_ID);
} }
uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) { uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
if (word == NULL) return NULL_ID; if (word == NULL) return NULL_NODE_ID;
unsigned char *ptr = (unsigned char *)word; unsigned char *ptr = (unsigned char *)word;
uint32_t node_id = i; uint32_t node_id = i;
trie_node_t node = trie_get_node(self, i); trie_node_t node = trie_get_node(self, i);
if (node.base == NULL_ID) return NULL_ID; if (node.base == NULL_NODE_ID) return NULL_NODE_ID;
uint32_t next_id; uint32_t next_id;
@@ -732,7 +732,7 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
node = trie_get_node(self, next_id); node = trie_get_node(self, next_id);
if (node.check != node_id) { if (node.check != node_id) {
return NULL_ID; return NULL_NODE_ID;
} }
if (node.check == node_id && node.base < 0) { if (node.check == node_id && node.base < 0) {
@@ -749,7 +749,7 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
if (tail_match == 0) { if (tail_match == 0) {
return next_id; return next_id;
} else { } else {
return NULL_ID; return NULL_NODE_ID;
} }
} }
@@ -761,12 +761,12 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
} }
uint32_t trie_get_len(trie_t *self, char *word, size_t len) { uint32_t trie_get_len(trie_t *self, char *word, size_t len) {
return trie_get_from_index(self, word, len, ROOT_ID); return trie_get_from_index(self, word, len, ROOT_NODE_ID);
} }
uint32_t trie_get(trie_t *self, char *word) { uint32_t trie_get(trie_t *self, char *word) {
size_t word_len = strlen(word); size_t word_len = strlen(word);
return trie_get_from_index(self, word, word_len, ROOT_ID); return trie_get_from_index(self, word, word_len, ROOT_NODE_ID);
} }
/* /*

View File

@@ -31,9 +31,9 @@ extern "C" {
#include "string_utils.h" #include "string_utils.h"
#define TRIE_SIGNATURE 0xABABABAB #define TRIE_SIGNATURE 0xABABABAB
#define NULL_ID 0 #define NULL_NODE_ID 0
#define FREE_LIST_ID 1 #define FREE_LIST_ID 1
#define ROOT_ID 2 #define ROOT_NODE_ID 2
#define TRIE_POOL_BEGIN 3 #define TRIE_POOL_BEGIN 3
#define DEFAULT_NODE_ARRAY_SIZE 32 #define DEFAULT_NODE_ARRAY_SIZE 32

View File

@@ -20,7 +20,7 @@ phrase_array *trie_search(trie_t *self, char *text) {
const uint8_t *fail_ptr = ptr; const uint8_t *fail_ptr = ptr;
trie_node_t node = trie_get_root(self), last_node = node; trie_node_t node = trie_get_root(self), last_node = node;
uint32_t node_id = ROOT_ID; uint32_t node_id = ROOT_NODE_ID;
uint32_t next_id; uint32_t next_id;
bool match = false; bool match = false;
@@ -78,7 +78,7 @@ phrase_array *trie_search(trie_t *self, char *text) {
} }
fail_ptr = ptr; fail_ptr = ptr;
last_node = node = trie_get_root(self); last_node = node = trie_get_root(self);
node_id = ROOT_ID; node_id = ROOT_NODE_ID;
phrase_start = phrase_len = 0; phrase_start = phrase_len = 0;
last_state = state; last_state = state;
match = false; match = false;
@@ -209,7 +209,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
phrase_array *phrases = phrase_array_new(); phrase_array *phrases = phrase_array_new();
trie_node_t node = trie_get_root(self), last_node = node; trie_node_t node = trie_get_root(self), last_node = node;
uint32_t node_id = ROOT_ID, last_node_id = ROOT_ID; uint32_t node_id = ROOT_NODE_ID, last_node_id = ROOT_NODE_ID;
uint32_t data; uint32_t data;
@@ -244,7 +244,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
if (node.check != last_node_id && last_node.base >= 0) { if (node.check != last_node_id && last_node.base >= 0) {
log_debug("Fell off trie. last_node_id=%d and node.check=%d\n", last_node_id, node.check); log_debug("Fell off trie. last_node_id=%d and node.check=%d\n", last_node_id, node.check);
node = trie_get_root(self); node = trie_get_root(self);
node_id = ROOT_ID; node_id = ROOT_NODE_ID;
break; break;
} else if (node.base < 0) { } else if (node.base < 0) {
log_debug("Searching tail at index %d\n", i); log_debug("Searching tail at index %d\n", i);
@@ -268,7 +268,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
int tail_search_result = trie_node_search_tail_tokens(self, node, str, tokens, ptr_len, i+1); int tail_search_result = trie_node_search_tail_tokens(self, node, str, tokens, ptr_len, i+1);
if (tail_search_result == -1) { if (tail_search_result == -1) {
node = trie_get_root(self); node = trie_get_root(self);
node_id = ROOT_ID; node_id = ROOT_NODE_ID;
break; break;
} else { } else {
phrase_len = tail_search_result - phrase_start + 1; phrase_len = tail_search_result - phrase_start + 1;
@@ -279,7 +279,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
} else { } else {
node = trie_get_root(self); node = trie_get_root(self);
node_id = ROOT_ID; node_id = ROOT_NODE_ID;
break; break;
} }
} }
@@ -295,7 +295,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
last_match_index = -1; last_match_index = -1;
phrase_start = 0; phrase_start = 0;
last_node = trie_get_root(self); last_node = trie_get_root(self);
last_node_id = ROOT_ID; last_node_id = ROOT_NODE_ID;
continue; continue;
} else if (last_state == SEARCH_STATE_PARTIAL_MATCH) { } else if (last_state == SEARCH_STATE_PARTIAL_MATCH) {
log_debug("last_state == SEARCH_STATE_PARTIAL_MATCH\n"); log_debug("last_state == SEARCH_STATE_PARTIAL_MATCH\n");
@@ -307,7 +307,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
log_debug("Plain token=%.*s\n", token.len, str + token.offset); log_debug("Plain token=%.*s\n", token.len, str + token.offset);
} }
last_node = trie_get_root(self); last_node = trie_get_root(self);
last_node_id = ROOT_ID; last_node_id = ROOT_NODE_ID;
} else { } else {
state = SEARCH_STATE_PARTIAL_MATCH; state = SEARCH_STATE_PARTIAL_MATCH;
@@ -346,7 +346,7 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
phrase_array_push(phrases, (phrase_t){phrase_start, last_match_index - phrase_start + 1, data}); phrase_array_push(phrases, (phrase_t){phrase_start, last_match_index - phrase_start + 1, data});
last_match_index = -1; last_match_index = -1;
last_node = node = trie_get_root(self); last_node = node = trie_get_root(self);
last_node_id = node_id = ROOT_ID; last_node_id = node_id = ROOT_NODE_ID;
state = SEARCH_STATE_BEGIN; state = SEARCH_STATE_BEGIN;
} else { } else {
log_debug("Has continuation, node_id=%d\n", continuation_id); log_debug("Has continuation, node_id=%d\n", continuation_id);
@@ -365,12 +365,12 @@ phrase_array *trie_search_tokens(trie_t *self, char *str, token_array *tokens) {
} }
phrase_t trie_search_suffixes(trie_t *self, char *word) { phrase_t trie_search_suffixes(trie_t *self, char *word) {
uint32_t node_id = ROOT_ID, last_node_id = ROOT_ID; uint32_t node_id = ROOT_NODE_ID, last_node_id = ROOT_NODE_ID;
trie_node_t last_node = trie_get_root(self); trie_node_t last_node = trie_get_root(self);
node_id = trie_get_transition_index(self, last_node, '\0'); node_id = trie_get_transition_index(self, last_node, '\0');
trie_node_t node = trie_get_node(self, node_id); trie_node_t node = trie_get_node(self, node_id);
if (node.check != ROOT_ID) { if (node.check != ROOT_NODE_ID) {
return (phrase_t){0, 0, 0}; return (phrase_t){0, 0, 0};
} else { } else {
last_node = node; last_node = node;
@@ -467,7 +467,7 @@ phrase_t trie_search_suffixes(trie_t *self, char *word) {
} }
phrase_t trie_search_prefixes(trie_t *self, char *word) { phrase_t trie_search_prefixes(trie_t *self, char *word) {
uint32_t node_id = ROOT_ID, last_node_id = node_id; uint32_t node_id = ROOT_NODE_ID, last_node_id = node_id;
trie_node_t node = trie_get_root(self), last_node = node; trie_node_t node = trie_get_root(self), last_node = node;
uint32_t value = 0, phrase_start = 0, phrase_len = 0; uint32_t value = 0, phrase_start = 0, phrase_len = 0;