[phrases] trie I/O using the uint APIs, fixes to trie_get_prefix_result_from_index

This commit is contained in:
Al
2015-05-27 16:06:35 -04:00
parent 897c29ccb8
commit ad8e92182c

View File

@@ -695,15 +695,15 @@ inline trie_data_node_t trie_get_data_node(trie_t *self, trie_node_t node) {
return data_node; return data_node;
} }
trie_prefix_result_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_t i, size_t tail_pos) { trie_prefix_result_t trie_get_prefix_from_index(trie_t *self, char *key, size_t len, uint32_t start_index, size_t tail_pos) {
if (key == NULL) { if (key == NULL) {
return NULL_PREFIX_RESULT; return NULL_PREFIX_RESULT;
} }
unsigned char *ptr = (unsigned char *)key; unsigned char *ptr = (unsigned char *)key;
uint32_t node_id = i; uint32_t node_id = start_index;
trie_node_t node = trie_get_node(self, i); trie_node_t node = trie_get_node(self, node_id);
if (node.base == NULL_NODE_ID) { if (node.base == NULL_NODE_ID) {
return NULL_PREFIX_RESULT; return NULL_PREFIX_RESULT;
} }
@@ -712,9 +712,11 @@ trie_prefix_result_t trie_get_prefix_from_index(trie_t *self, char *key, size_t
bool original_node_no_tail = node.base >= 0; bool original_node_no_tail = node.base >= 0;
int i = 0;
if (node.base >= 0) { if (node.base >= 0) {
// Include NUL-byte. It may be stored if this phrase is a prefix of a longer one // Include NUL-byte. It may be stored if this phrase is a prefix of a longer one
for (int i = 0; i < len; i++, ptr++, node_id = next_id) { for (i = 0; i < len; i++, ptr++, node_id = next_id) {
next_id = trie_get_transition_index(self, node, *ptr); next_id = trie_get_transition_index(self, node, *ptr);
node = trie_get_node(self, next_id); node = trie_get_node(self, next_id);
@@ -726,13 +728,14 @@ trie_prefix_result_t trie_get_prefix_from_index(trie_t *self, char *key, size_t
} }
} else { } else {
next_id = node_id; next_id = node_id;
node = trie_get_node(self, node_id);
} }
if (node.base < 0) { if (node.base < 0) {
trie_data_node_t data_node = trie_get_data_node(self, node); trie_data_node_t data_node = trie_get_data_node(self, node);
char *query_tail = *ptr && original_node_no_tail ? (char *)ptr + 1 : (char *)ptr; char *query_tail = (*ptr && original_node_no_tail) ? (char *)ptr + 1 : (char *)ptr;
size_t query_len = strlen(query_tail); size_t query_len = (*ptr && original_node_no_tail) ? len - i - 1 : len - i;
if (data_node.tail != 0 && trie_compare_tail(self, query_tail, query_len, data_node.tail + tail_pos)) { if (data_node.tail != 0 && trie_compare_tail(self, query_tail, query_len, data_node.tail + tail_pos)) {
return (trie_prefix_result_t){next_id, query_len}; return (trie_prefix_result_t){next_id, query_len};
@@ -740,7 +743,8 @@ trie_prefix_result_t trie_get_prefix_from_index(trie_t *self, char *key, size_t
return NULL_PREFIX_RESULT; return NULL_PREFIX_RESULT;
} }
} else {
return (trie_prefix_result_t){next_id, 0};
} }
return NULL_PREFIX_RESULT; return NULL_PREFIX_RESULT;
@@ -829,13 +833,13 @@ I/O methods
*/ */
bool trie_write(trie_t *self, FILE *file) { bool trie_write(trie_t *self, FILE *file) {
if (!file_write_int32(file, (int32_t)TRIE_SIGNATURE)) if (!file_write_uint32(file, TRIE_SIGNATURE))
return false; return false;
if (!file_write_int32(file, (int32_t)self->alphabet_size)) if (!file_write_uint32(file, (uint32_t)self->alphabet_size))
return false; return false;
if (!file_write_chars(file, (char *)self->alphabet, self->alphabet_size)) if (!file_write_chars(file, (char *)self->alphabet, self->alphabet_size))
return false; return false;
if (!file_write_int32(file, (int32_t)self->nodes->n)) if (!file_write_uint32(file, (uint32_t)self->nodes->n))
return false; return false;
int i; int i;
@@ -843,25 +847,25 @@ bool trie_write(trie_t *self, FILE *file) {
for (i = 0; i < self->nodes->n; i++) { for (i = 0; i < self->nodes->n; i++) {
node = self->nodes->a[i]; node = self->nodes->a[i];
if (!file_write_int32(file, node.base) || if (!file_write_uint32(file, (uint32_t)node.base) ||
!file_write_int32(file, node.check)) { !file_write_uint32(file, (uint32_t)node.check)) {
return false; return false;
} }
} }
if (!file_write_int32(file, (int32_t)self->data->n)) if (!file_write_uint32(file, (uint32_t)self->data->n))
return false; return false;
trie_data_node_t data_node; trie_data_node_t data_node;
for (i = 0; i < self->data->n; i++) { for (i = 0; i < self->data->n; i++) {
data_node = self->data->a[i]; data_node = self->data->a[i];
if (!file_write_int32(file, (int32_t)data_node.tail) || if (!file_write_uint32(file, data_node.tail) ||
!file_write_int32(file, (int32_t)data_node.data)) { !file_write_uint32(file, data_node.data)) {
return false; return false;
} }
} }
if (!file_write_int32(file, (int32_t)self->tail->n)) if (!file_write_uint32(file, (uint32_t)self->tail->n))
return false; return false;
if (!file_write_chars(file, (char *)self->tail->a, self->tail->n)) if (!file_write_chars(file, (char *)self->tail->a, self->tail->n))
@@ -894,7 +898,7 @@ trie_t *trie_read(FILE *file) {
uint32_t signature; uint32_t signature;
if (!file_read_int32(file, (int32_t *)&signature)) if (!file_read_uint32(file, &signature))
goto exit_file_read; goto exit_file_read;
if (signature != TRIE_SIGNATURE) if (signature != TRIE_SIGNATURE)
@@ -902,7 +906,7 @@ trie_t *trie_read(FILE *file) {
uint32_t alphabet_size; uint32_t alphabet_size;
if (!file_read_int32(file, (int32_t *)&alphabet_size)) if (!file_read_uint32(file, &alphabet_size))
goto exit_file_read; goto exit_file_read;
log_debug("alphabet_size=%d\n", alphabet_size); log_debug("alphabet_size=%d\n", alphabet_size);
@@ -918,7 +922,7 @@ trie_t *trie_read(FILE *file) {
uint32_t num_nodes; uint32_t num_nodes;
if (!file_read_int32(file, (int32_t *)&num_nodes)) if (!file_read_uint32(file, &num_nodes))
goto exit_trie_created; goto exit_trie_created;
log_debug("num_nodes=%d\n", num_nodes); log_debug("num_nodes=%d\n", num_nodes);
@@ -928,8 +932,8 @@ trie_t *trie_read(FILE *file) {
int32_t check; int32_t check;
trie_node_t node; trie_node_t node;
for (i = 0; i < num_nodes; i++) { for (i = 0; i < num_nodes; i++) {
if (!file_read_int32(file, (int32_t *)&base) || if (!file_read_uint32(file, (uint32_t *)&base) ||
!file_read_int32(file, (int32_t *)&check)) !file_read_uint32(file, (uint32_t *)&check))
goto exit_trie_created; goto exit_trie_created;
node.base = base; node.base = base;
@@ -938,7 +942,7 @@ trie_t *trie_read(FILE *file) {
} }
uint32_t num_data_nodes; uint32_t num_data_nodes;
if (!file_read_int32(file, (int32_t *)&num_data_nodes)) if (!file_read_uint32(file, &num_data_nodes))
goto exit_trie_created; goto exit_trie_created;
trie_data_array_resize(trie->data, num_data_nodes); trie_data_array_resize(trie->data, num_data_nodes);
@@ -949,8 +953,8 @@ trie_t *trie_read(FILE *file) {
trie_data_node_t data_node; trie_data_node_t data_node;
for (i = 0; i < num_data_nodes; i++) { for (i = 0; i < num_data_nodes; i++) {
if (!file_read_int32(file, (int32_t *)&tail_ptr) || if (!file_read_uint32(file, &tail_ptr) ||
!file_read_int32(file, (int32_t *)&data)) !file_read_uint32(file, &data))
goto exit_trie_created; goto exit_trie_created;
data_node.tail = tail_ptr; data_node.tail = tail_ptr;
data_node.data = data; data_node.data = data;
@@ -958,7 +962,7 @@ trie_t *trie_read(FILE *file) {
} }
uint32_t tail_len; uint32_t tail_len;
if (!file_read_int32(file, (int32_t *)&tail_len)) if (!file_read_uint32(file, &tail_len))
goto exit_trie_created; goto exit_trie_created;
uchar_array_resize(trie->tail, tail_len); uchar_array_resize(trie->tail, tail_len);