[phrases] New trie_add_nodes_only method for concatenating strings to the trie, plus boolean return values on trie_add_* APIs

This commit is contained in:
Al
2015-04-27 01:01:43 -04:00
parent 8bc77372ef
commit 8fb9bacfa6
2 changed files with 53 additions and 16 deletions

View File

@@ -609,12 +609,47 @@ void trie_print(trie_t *self) {
}
void trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data) {
uint32_t trie_add_nodes_only(trie_t *self, uint32_t node_id, char *key, size_t len) {
unsigned char *ptr = (unsigned char *)key;
uint32_t last_node_id = node_id;
trie_node_t last_node = trie_get_node(self, node_id);
if (last_node.base == NULL_ID) return NULL_ID;
uint32_t original_node_id = node_id;
trie_node_t node;
// Walks node until prefix reached, including the trailing \0
for (int i = 0; i < len; ptr++, i++, last_node_id = node_id, last_node = node) {
log_debug("--- char=%c\n", *ptr);
node_id = trie_get_transition_index(self, last_node, *ptr);
log_debug("node_id=%d\n", node_id);
if (node_id != NULL_ID) {
trie_make_room_for(self, node_id);
}
node = trie_get_node(self, node_id);
log_debug("node.check=%d, last_node_id=%d, node.base=%d\n", node.check, last_node_id, node.base);
if (node.check < 0 || (node.check != last_node_id)) {
node_id = trie_add_transition(self, last_node_id, *ptr);
if (node_id == TRIE_INDEX_ERROR) {
trie_prune_up_to(self, original_node_id, last_node_id);
return NULL_ID;
}
}
}
return node_id;
}
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data) {
int num_chars = strlen(key);
unsigned char *ptr = (unsigned char *)key;
uint32_t last_node_id = node_id;
trie_node_t last_node = trie_get_node(self, node_id);
if (last_node.base == NULL_ID) return;
if (last_node.base == NULL_ID) return false;
trie_node_t node;
@@ -634,25 +669,25 @@ void trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data)
if (node.check < 0 || (node.check != last_node_id)) {
log_debug("last_node_id=%d, ptr=%s, tail_pos=%zu\n", last_node_id, ptr, self->tail->n);
trie_separate_tail(self, last_node_id, ptr, data);
return;
return true;
} else if (node.base < 0 && node.check == last_node_id) {
log_debug("Case 3 insertion\n");
trie_tail_merge(self, node_id, ptr + 1, data);
return;
return true;
}
}
return;
return true;
}
void trie_add(trie_t *self, char *key, uint32_t data) {
if (strlen(key) == 0) return;
trie_add_at_index(self, ROOT_ID, key, data);
bool trie_add(trie_t *self, char *key, uint32_t data) {
if (strlen(key) == 0) return false;
return trie_add_at_index(self, ROOT_ID, key, data);
}
void trie_add_suffix(trie_t *self, char *key, uint32_t data) {
if (strlen(key) == 0) return;
bool trie_add_suffix(trie_t *self, char *key, uint32_t data) {
if (strlen(key) == 0) return false;
trie_node_t root = trie_get_root(self);
uint32_t node_id = trie_get_transition_index(self, root, '\0');
@@ -662,8 +697,9 @@ void trie_add_suffix(trie_t *self, char *key, uint32_t data) {
}
char *suffix = utf8_reversed_string(key);
trie_add_at_index(self, node_id, suffix, data);
bool success = trie_add_at_index(self, node_id, suffix, data);
free(suffix);
return success;
}
uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
@@ -680,7 +716,7 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
/* Include NUL-byte if we're looking for whole phrases.
* It may be stored if this phrase is a prefix of a longer one */
for (int i = 0; i < len; i++, ptr++, node_id = next_id) {
for (int i = 0; i < len + 1; i++, ptr++, node_id = next_id) {
next_id = trie_get_transition_index(self, node, *ptr);
node = trie_get_node(self, next_id);
@@ -700,7 +736,7 @@ uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i) {
int tail_match = strncmp((char *)current_tail, query_tail, query_tail_len);
if (tail_match == 0) {
return next_id;
return data_node.data;
} else {
return NULL_ID;
}

View File

@@ -91,9 +91,10 @@ void trie_set_tail(trie_t *self, unsigned char *tail, int32_t tail_pos);
int32_t trie_separate_tail(trie_t *self, uint32_t from_index, unsigned char *tail, uint32_t data);
void trie_tail_merge(trie_t *self, uint32_t old_node_id, unsigned char *suffix, uint32_t data);
void trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data);
void trie_add(trie_t *self, char *key, uint32_t data);
void trie_add_suffix(trie_t *self, char *key, uint32_t data);
uint32_t trie_add_nodes_only(trie_t *self, uint32_t node_id, char *key, size_t len);
bool trie_add_at_index(trie_t *self, uint32_t node_id, char *key, uint32_t data);
bool trie_add(trie_t *self, char *key, uint32_t data);
bool trie_add_suffix(trie_t *self, char *key, uint32_t data);
uint32_t trie_get_from_index(trie_t *self, char *word, size_t len, uint32_t i);
uint32_t trie_get_len(trie_t *self, char *word, size_t len);