From f61d9931579955d9fe658b6eda89f92badb07965 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 22 Jul 2015 03:51:14 -0400 Subject: [PATCH] [expansion] removing the self param from address_dictionary methods, adding search_address_dictionaries method which searches a string for phrases in a particular language --- src/address_dictionary.c | 51 +++++++++++++++++++++++--------- src/address_dictionary.h | 7 +++-- src/address_dictionary_builder.c | 2 +- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/address_dictionary.c b/src/address_dictionary.c index 4a05ec99..2651c179 100644 --- a/src/address_dictionary.c +++ b/src/address_dictionary.c @@ -12,17 +12,22 @@ address_dictionary_t *get_address_dictionary(void) { return address_dict; } -address_expansion_array *address_dictionary_get_expansions(address_dictionary_t *self, char *key) { - if (self == NULL || self->expansions == NULL) return NULL; - khiter_t k = kh_get(str_expansions, self->expansions, key); - return k != kh_end(self->expansions) ? kh_value(self->expansions, k) : NULL; +address_expansion_array *address_dictionary_get_expansions(char *key) { + if (address_dict == NULL || address_dict->expansions == NULL) return NULL; + khiter_t k = kh_get(str_expansions, address_dict->expansions, key); + return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL; } -bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components) { +char *address_dictionary_get_canonical(uint32_t index) { + if (address_dict == NULL || address_dict->canonical == NULL || index > cstring_array_num_strings(address_dict->canonical)) return NULL; + return cstring_array_get_string(address_dict->canonical, index); +} + +bool address_dictionary_add_expansion(char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components) { int ret; log_debug("key=%s\n", key); - address_expansion_array *expansions = address_dictionary_get_expansions(self, key); + address_expansion_array *expansions = address_dictionary_get_expansions(key); int32_t canonical_index; @@ -33,8 +38,8 @@ bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, cha canonical_index = -1; value.canonical = 1; } else { - canonical_index = (int32_t) cstring_array_num_strings(self->canonical); - cstring_array_add_string(self->canonical, canonical); + canonical_index = (int32_t) cstring_array_num_strings(address_dict->canonical); + cstring_array_add_string(address_dict->canonical, canonical); value.canonical = 0; } @@ -48,21 +53,21 @@ bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, cha if (expansions == NULL) { expansions = address_expansion_array_new_size(1); address_expansion_array_push(expansions, expansion); - khiter_t k = kh_put(str_expansions, self->expansions, strdup(key), &ret); - kh_value(self->expansions, k) = expansions; + khiter_t k = kh_put(str_expansions, address_dict->expansions, strdup(key), &ret); + kh_value(address_dict->expansions, k) = expansions; value.count = 1; value.components = address_components; log_debug("value.count=%d, value.components=%d\n", value.count, value.components); - trie_add(self->trie, key, value.value); + trie_add(address_dict->trie, key, value.value); } else { - uint32_t node_id = trie_get(self->trie, key); + uint32_t node_id = trie_get(address_dict->trie, key); log_debug("node_id=%d\n", node_id); if (node_id != NULL_NODE_ID) { - if (!trie_get_data_at_index(self->trie, node_id, &value.value)) { + if (!trie_get_data_at_index(address_dict->trie, node_id, &value.value)) { log_warn("get_data_at_index returned false\n"); return false; } @@ -76,7 +81,7 @@ bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, cha value.count++; value.components |= address_components; - if (!trie_set_data_at_index(self->trie, node_id, value.value)) { + if (!trie_set_data_at_index(address_dict->trie, node_id, value.value)) { log_warn("set_data_at_index returned false for node_id=%d and value=%d\n", node_id, value.value); return false; } @@ -89,6 +94,24 @@ bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, cha } +phrase_array *search_address_dictionaries(char *str, char *lang) { + if (str == NULL || lang == NULL) return NULL; + + trie_prefix_result_t prefix = trie_get_prefix(address_dict->trie, lang); + + if (prefix.node_id == NULL_NODE_ID) { + return NULL; + } + + prefix = trie_get_prefix_from_index(address_dict->trie, NAMESPACE_SEPARATOR_CHAR, NAMESPACE_SEPARATOR_CHAR_LEN, prefix.node_id, prefix.tail_pos); + + if (prefix.node_id == NULL_NODE_ID) { + return NULL; + } + + return trie_search_from_index(address_dict->trie, str, prefix.node_id); +} + bool address_dictionary_init(void) { if (address_dict != NULL) return false; diff --git a/src/address_dictionary.h b/src/address_dictionary.h index 5254fdf4..221273b6 100644 --- a/src/address_dictionary.h +++ b/src/address_dictionary.h @@ -17,6 +17,7 @@ extern "C" { #include "file_utils.h" #include "gazetteers.h" #include "trie.h" +#include "trie_search.h" #define DEFAULT_ADDRESS_EXPANSION_PATH LIBPOSTAL_DATA_DIR PATH_SEPARATOR "address_expansions" PATH_SEPARATOR "address_dictionary.dat" @@ -50,8 +51,10 @@ address_dictionary_t *get_address_dictionary(void); bool address_dictionary_init(void); -address_expansion_array *address_dictionary_get_expansions(address_dictionary_t *self, char *key); -bool address_dictionary_add_expansion(address_dictionary_t *self, char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components); +phrase_array *search_address_dictionaries(char *str, char *lang); +address_expansion_array *address_dictionary_get_expansions(char *key); +char *address_dictionary_get_canonical(uint32_t index); +bool address_dictionary_add_expansion(char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components); void address_dictionary_destroy(address_dictionary_t *self); diff --git a/src/address_dictionary_builder.c b/src/address_dictionary_builder.c index cb96cb00..9a351687 100644 --- a/src/address_dictionary_builder.c +++ b/src/address_dictionary_builder.c @@ -64,7 +64,7 @@ int main(int argc, char **argv) { char_array_cat(key, expansion_rule.phrase); char *token = char_array_get_string(key); - if (!address_dictionary_add_expansion(address_dict, token, canonical, language, dictionary_id, address_components)) { + if (!address_dictionary_add_expansion(token, canonical, language, dictionary_id, address_components)) { log_error("Could not add expansion {%s, %s}\n", language, expansion_rule.phrase); exit(EXIT_FAILURE); }