[expansion] Changes to address_expansion struct to allow for multiple dictionaries per record. Only adding unique canonical strings to the string array
This commit is contained in:
@@ -18,12 +18,24 @@ address_expansion_array *address_dictionary_get_expansions(char *key) {
|
|||||||
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
return k != kh_end(address_dict->expansions) ? kh_value(address_dict->expansions, k) : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t address_dictionary_next_canonical_index(void) {
|
||||||
|
if (address_dict == NULL || address_dict->canonical == NULL) return -1;
|
||||||
|
return (int32_t)cstring_array_num_strings(address_dict->canonical);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool address_dictionary_add_canonical(char *canonical) {
|
||||||
|
if (address_dict == NULL || address_dict->canonical == NULL) return false;
|
||||||
|
cstring_array_add_string(address_dict->canonical, canonical);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
char *address_dictionary_get_canonical(uint32_t index) {
|
char *address_dictionary_get_canonical(uint32_t index) {
|
||||||
if (address_dict == NULL || address_dict->canonical == NULL || index > cstring_array_num_strings(address_dict->canonical)) return NULL;
|
if (address_dict == NULL || address_dict->canonical == NULL || index > cstring_array_num_strings(address_dict->canonical)) return NULL;
|
||||||
return cstring_array_get_string(address_dict->canonical, index);
|
return cstring_array_get_string(address_dict->canonical, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool address_dictionary_add_expansion(char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components) {
|
bool address_dictionary_add_expansion(char *key, address_expansion_t expansion) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
log_debug("key=%s\n", key);
|
log_debug("key=%s\n", key);
|
||||||
@@ -33,22 +45,7 @@ bool address_dictionary_add_expansion(char *key, char *canonical, char *language
|
|||||||
|
|
||||||
expansion_value_t value;
|
expansion_value_t value;
|
||||||
value.value = 0;
|
value.value = 0;
|
||||||
|
value.canonical = expansion.canonical_index == -1;
|
||||||
if (canonical == NULL) {
|
|
||||||
canonical_index = -1;
|
|
||||||
value.canonical = 1;
|
|
||||||
} else {
|
|
||||||
canonical_index = (int32_t) cstring_array_num_strings(address_dict->canonical);
|
|
||||||
cstring_array_add_string(address_dict->canonical, canonical);
|
|
||||||
value.canonical = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
address_expansion_t expansion;
|
|
||||||
|
|
||||||
expansion.canonical_index = canonical_index;
|
|
||||||
strcpy(expansion.language, language);
|
|
||||||
expansion.dictionary_id = dictionary_id;
|
|
||||||
expansion.address_components = address_components;
|
|
||||||
|
|
||||||
if (expansions == NULL) {
|
if (expansions == NULL) {
|
||||||
expansions = address_expansion_array_new_size(1);
|
expansions = address_expansion_array_new_size(1);
|
||||||
@@ -57,16 +54,14 @@ bool address_dictionary_add_expansion(char *key, char *canonical, char *language
|
|||||||
kh_value(address_dict->expansions, k) = expansions;
|
kh_value(address_dict->expansions, k) = expansions;
|
||||||
|
|
||||||
value.count = 1;
|
value.count = 1;
|
||||||
value.components = address_components;
|
value.components = expansion.address_components;
|
||||||
log_debug("value.count=%d, value.components=%d\n", value.count, value.components);
|
log_debug("value.count=%d, value.components=%d\n", value.count, value.components);
|
||||||
|
|
||||||
trie_add(address_dict->trie, key, value.value);
|
trie_add(address_dict->trie, key, value.value);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
uint32_t node_id = trie_get(address_dict->trie, key);
|
uint32_t node_id = trie_get(address_dict->trie, key);
|
||||||
log_debug("node_id=%d\n", node_id);
|
log_debug("node_id=%d\n", node_id);
|
||||||
if (node_id != NULL_NODE_ID) {
|
if (node_id != NULL_NODE_ID) {
|
||||||
|
|
||||||
if (!trie_get_data_at_index(address_dict->trie, node_id, &value.value)) {
|
if (!trie_get_data_at_index(address_dict->trie, node_id, &value.value)) {
|
||||||
log_warn("get_data_at_index returned false\n");
|
log_warn("get_data_at_index returned false\n");
|
||||||
return false;
|
return false;
|
||||||
@@ -79,7 +74,7 @@ bool address_dictionary_add_expansion(char *key, char *canonical, char *language
|
|||||||
}
|
}
|
||||||
|
|
||||||
value.count++;
|
value.count++;
|
||||||
value.components |= address_components;
|
value.components |= expansion.address_components;
|
||||||
|
|
||||||
if (!trie_set_data_at_index(address_dict->trie, node_id, value.value)) {
|
if (!trie_set_data_at_index(address_dict->trie, node_id, value.value)) {
|
||||||
log_warn("set_data_at_index returned false for node_id=%d and value=%d\n", node_id, value.value);
|
log_warn("set_data_at_index returned false for node_id=%d and value=%d\n", node_id, value.value);
|
||||||
@@ -187,10 +182,16 @@ static bool address_expansion_read(FILE *f, address_expansion_t *expansion) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!file_read_uint16(f, &expansion->dictionary_id)) {
|
if (!file_read_uint32(f, (uint32_t *)&expansion->num_dictionaries)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < expansion->num_dictionaries; i++) {
|
||||||
|
if (!file_read_uint16(f, (uint16_t *)expansion->dictionary_ids + i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!file_read_uint16(f, &expansion->address_components)) {
|
if (!file_read_uint16(f, &expansion->address_components)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -207,12 +208,21 @@ static bool address_expansion_write(FILE *f, address_expansion_t expansion) {
|
|||||||
if (!file_write_uint32(f, (uint32_t)expansion.canonical_index) ||
|
if (!file_write_uint32(f, (uint32_t)expansion.canonical_index) ||
|
||||||
!file_write_uint32(f, language_len) ||
|
!file_write_uint32(f, language_len) ||
|
||||||
!file_write_chars(f, expansion.language, language_len) ||
|
!file_write_chars(f, expansion.language, language_len) ||
|
||||||
!file_write_uint16(f, expansion.dictionary_id) ||
|
!file_write_uint32(f, expansion.num_dictionaries)
|
||||||
!file_write_uint16(f, expansion.address_components)
|
|
||||||
) {
|
) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < expansion.num_dictionaries; i++) {
|
||||||
|
if (!file_write_uint16(f, expansion.dictionary_ids[i])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!file_write_uint16(f, expansion.address_components)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,8 @@ typedef union expansion_value {
|
|||||||
typedef struct address_expansion {
|
typedef struct address_expansion {
|
||||||
int32_t canonical_index;
|
int32_t canonical_index;
|
||||||
char language[MAX_LANGUAGE_LEN];
|
char language[MAX_LANGUAGE_LEN];
|
||||||
uint16_t dictionary_id;
|
uint32_t num_dictionaries;
|
||||||
|
uint16_t dictionary_ids[MAX_DICTIONARY_TYPES];
|
||||||
uint16_t address_components;
|
uint16_t address_components;
|
||||||
} address_expansion_t;
|
} address_expansion_t;
|
||||||
|
|
||||||
@@ -56,7 +57,9 @@ bool address_dictionary_init(void);
|
|||||||
phrase_array *search_address_dictionaries(char *str, char *lang);
|
phrase_array *search_address_dictionaries(char *str, char *lang);
|
||||||
address_expansion_array *address_dictionary_get_expansions(char *key);
|
address_expansion_array *address_dictionary_get_expansions(char *key);
|
||||||
char *address_dictionary_get_canonical(uint32_t index);
|
char *address_dictionary_get_canonical(uint32_t index);
|
||||||
bool address_dictionary_add_expansion(char *key, char *canonical, char *language, uint16_t dictionary_id, uint16_t address_components);
|
int32_t address_dictionary_next_canonical_index(void);
|
||||||
|
bool address_dictionary_add_canonical(char *canonical);
|
||||||
|
bool address_dictionary_add_expansion(char *key, address_expansion_t expansion);
|
||||||
|
|
||||||
void address_dictionary_destroy(address_dictionary_t *self);
|
void address_dictionary_destroy(address_dictionary_t *self);
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
khash_t(int_int) *dictionary_components = kh_init(int_int);
|
khash_t(int_int) *dictionary_components = kh_init(int_int);
|
||||||
|
|
||||||
|
khash_t(str_int) *canonical_indices = kh_init(str_int);
|
||||||
|
|
||||||
khiter_t k;
|
khiter_t k;
|
||||||
|
|
||||||
for (int g = 0; g < NUM_DICTIONARY_TYPES; g++) {
|
for (int g = 0; g < NUM_DICTIONARY_TYPES; g++) {
|
||||||
@@ -41,26 +43,57 @@ int main(int argc, char **argv) {
|
|||||||
address_language_index_t lang_index = expansion_languages[i];
|
address_language_index_t lang_index = expansion_languages[i];
|
||||||
char *language = lang_index.language;
|
char *language = lang_index.language;
|
||||||
|
|
||||||
|
log_info("Doing language: %s\n", language);
|
||||||
|
|
||||||
for (int j = lang_index.index; j < lang_index.index + lang_index.len; j++) {
|
for (int j = lang_index.index; j < lang_index.index + lang_index.len; j++) {
|
||||||
address_expansion_rule_t expansion_rule = expansion_rules[j];
|
address_expansion_rule_t expansion_rule = expansion_rules[j];
|
||||||
uint16_t dictionary_id = (uint16_t) expansion_rule.dictionary;
|
|
||||||
|
|
||||||
k = kh_get(int_int, dictionary_components, (uint32_t)dictionary_id);
|
uint16_t address_components = 0;
|
||||||
if (k == kh_end(dictionary_components)) {
|
|
||||||
log_error("Invalid dictionary_type: %d\n", dictionary_id);
|
address_expansion_t expansion;
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
|
strcpy(expansion.language, language);
|
||||||
|
expansion.num_dictionaries = expansion_rule.num_dictionaries;
|
||||||
|
|
||||||
|
for (int d = 0; d < expansion_rule.num_dictionaries; d++) {
|
||||||
|
uint16_t dictionary_id = (uint16_t) expansion_rule.dictionaries[d];
|
||||||
|
|
||||||
|
expansion.dictionary_ids[d] = dictionary_id;
|
||||||
|
|
||||||
|
k = kh_get(int_int, dictionary_components, (uint32_t)dictionary_id);
|
||||||
|
if (k == kh_end(dictionary_components)) {
|
||||||
|
log_error("Invalid dictionary_type: %d\n", dictionary_id);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
address_components |= (uint16_t) kh_value(dictionary_components, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t address_components = (uint16_t) kh_value(dictionary_components, k);
|
expansion.address_components = address_components;
|
||||||
|
|
||||||
char *canonical = NULL;
|
char *canonical = NULL;
|
||||||
if (expansion_rule.canonical_index != -1) {
|
if (expansion_rule.canonical_index != -1) {
|
||||||
canonical = canonical_strings[expansion_rule.canonical_index];
|
canonical = canonical_strings[expansion_rule.canonical_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (canonical == NULL) {
|
||||||
|
expansion.canonical_index = -1;
|
||||||
|
} else {
|
||||||
|
k = kh_get(str_int, canonical_indices, canonical);
|
||||||
|
if (k != kh_end(canonical_indices)) {
|
||||||
|
expansion.canonical_index = kh_value(canonical_indices, k);
|
||||||
|
} else {
|
||||||
|
expansion.canonical_index = address_dictionary_next_canonical_index();
|
||||||
|
if (!address_dictionary_add_canonical(canonical)) {
|
||||||
|
log_error("Error adding canonical string: %s\n", canonical);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Add the phrase itself to the base namespace for existence checks
|
// Add the phrase itself to the base namespace for existence checks
|
||||||
|
|
||||||
if (!address_dictionary_add_expansion(expansion_rule.phrase, canonical, language, dictionary_id, address_components)) {
|
if (!address_dictionary_add_expansion(expansion_rule.phrase, expansion)) {
|
||||||
log_error("Could not add expansion {%s}\n", expansion_rule.phrase);
|
log_error("Could not add expansion {%s}\n", expansion_rule.phrase);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
@@ -73,7 +106,7 @@ int main(int argc, char **argv) {
|
|||||||
char_array_cat(key, expansion_rule.phrase);
|
char_array_cat(key, expansion_rule.phrase);
|
||||||
char *token = char_array_get_string(key);
|
char *token = char_array_get_string(key);
|
||||||
|
|
||||||
if (!address_dictionary_add_expansion(token, canonical, language, dictionary_id, address_components)) {
|
if (!address_dictionary_add_expansion(token, expansion)) {
|
||||||
log_error("Could not add language expansion {%s, %s}\n", language, expansion_rule.phrase);
|
log_error("Could not add language expansion {%s, %s}\n", language, expansion_rule.phrase);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
@@ -86,5 +119,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
kh_destroy(int_int, dictionary_components);
|
kh_destroy(int_int, dictionary_components);
|
||||||
|
|
||||||
|
kh_destroy(str_int, canonical_indices);
|
||||||
|
|
||||||
address_dictionary_module_teardown();
|
address_dictionary_module_teardown();
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user