[parser] remove geodb and fix small memory leak in address_parser_train
This commit is contained in:
@@ -284,14 +284,11 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log_info("phrase=%s\n", phrase);
|
|
||||||
|
|
||||||
cstring_array_add_string(phrases, phrase);
|
cstring_array_add_string(phrases, phrase);
|
||||||
cstring_array_add_string(phrase_labels, prev_label);
|
cstring_array_add_string(phrase_labels, prev_label);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == num_strings - 1 && !same_as_previous_label && prev_label != NULL) {
|
if (i == num_strings - 1 && !same_as_previous_label && prev_label != NULL) {
|
||||||
log_info("phrase=%s\n", normalized);
|
|
||||||
cstring_array_add_string(phrases, normalized);
|
cstring_array_add_string(phrases, normalized);
|
||||||
cstring_array_add_string(phrase_labels, label);
|
cstring_array_add_string(phrase_labels, label);
|
||||||
}
|
}
|
||||||
@@ -368,7 +365,6 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
for (int p_i = 0; p_i < sizeof(phrases) / sizeof(char *); p_i++) {
|
for (int p_i = 0; p_i < sizeof(phrases) / sizeof(char *); p_i++) {
|
||||||
phrase = phrases[p_i];
|
phrase = phrases[p_i];
|
||||||
if (phrase == NULL) continue;
|
if (phrase == NULL) continue;
|
||||||
log_info("adding: %s\n", phrase);
|
|
||||||
|
|
||||||
k = kh_get(phrase_stats, phrase_stats, phrase);
|
k = kh_get(phrase_stats, phrase_stats, phrase);
|
||||||
|
|
||||||
@@ -429,7 +425,6 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (normalized_phrase != NULL) {
|
if (normalized_phrase != NULL) {
|
||||||
log_info("freeing\n");
|
|
||||||
free(normalized_phrase);
|
free(normalized_phrase);
|
||||||
normalized_phrase = NULL;
|
normalized_phrase = NULL;
|
||||||
}
|
}
|
||||||
@@ -443,7 +438,7 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("Done with vocab, total size=%d\n", vocab_size);
|
log_info("Done with vocab, total size=%d\n", vocab_size);
|
||||||
|
|
||||||
for (k = kh_begin(vocab); k != kh_end(vocab); ++k) {
|
for (k = kh_begin(vocab); k != kh_end(vocab); ++k) {
|
||||||
token = (char *)kh_key(vocab, k);
|
token = (char *)kh_key(vocab, k);
|
||||||
@@ -457,8 +452,13 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_info("Creating phrases trie\n");
|
||||||
|
|
||||||
|
|
||||||
phrase_counts_trie = trie_new_from_hash(phrase_counts);
|
phrase_counts_trie = trie_new_from_hash(phrase_counts);
|
||||||
|
|
||||||
|
log_info("Calculating phrase types\n");
|
||||||
|
|
||||||
kh_foreach(phrase_stats, token, stats, {
|
kh_foreach(phrase_stats, token, stats, {
|
||||||
class_counts = stats.class_counts;
|
class_counts = stats.class_counts;
|
||||||
int most_common = -1;
|
int most_common = -1;
|
||||||
@@ -494,6 +494,8 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
|
|
||||||
parser->model = NULL;
|
parser->model = NULL;
|
||||||
|
|
||||||
|
log_info("Creating vocab trie\n");
|
||||||
|
|
||||||
parser->vocab = trie_new_from_hash(vocab);
|
parser->vocab = trie_new_from_hash(vocab);
|
||||||
if (parser->vocab == NULL) {
|
if (parser->vocab == NULL) {
|
||||||
log_error("Error initializing vocabulary\n");
|
log_error("Error initializing vocabulary\n");
|
||||||
@@ -502,6 +504,8 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
goto exit_hashes_allocated;
|
goto exit_hashes_allocated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_info("Creating phrase_types trie\n");
|
||||||
|
|
||||||
parser->phrase_types = trie_new_from_hash(phrase_types);
|
parser->phrase_types = trie_new_from_hash(phrase_types);
|
||||||
if (parser->phrase_types == NULL) {
|
if (parser->phrase_types == NULL) {
|
||||||
log_error("Error converting phrase_types to trie\n");
|
log_error("Error converting phrase_types to trie\n");
|
||||||
@@ -510,11 +514,14 @@ address_parser_t *address_parser_init(char *filename) {
|
|||||||
goto exit_hashes_allocated;
|
goto exit_hashes_allocated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_info("Freeing memory from initialization\n");
|
||||||
|
|
||||||
exit_hashes_allocated:
|
exit_hashes_allocated:
|
||||||
// Free memory for hashtables, etc.
|
// Free memory for hashtables, etc.
|
||||||
|
|
||||||
char_array_destroy(token_builder);
|
char_array_destroy(token_builder);
|
||||||
char_array_destroy(postcode_token_builder);
|
char_array_destroy(postcode_token_builder);
|
||||||
|
char_array_destroy(sub_token_builder);
|
||||||
char_array_destroy(phrase_builder);
|
char_array_destroy(phrase_builder);
|
||||||
cstring_array_destroy(phrases);
|
cstring_array_destroy(phrases);
|
||||||
cstring_array_destroy(phrase_labels);
|
cstring_array_destroy(phrase_labels);
|
||||||
@@ -569,8 +576,6 @@ bool address_parser_train_epoch(address_parser_t *self, averaged_perceptron_trai
|
|||||||
|
|
||||||
address_parser_context_t *context = address_parser_context_new();
|
address_parser_context_t *context = address_parser_context_new();
|
||||||
|
|
||||||
bool success = false;
|
|
||||||
|
|
||||||
size_t examples = 0;
|
size_t examples = 0;
|
||||||
size_t errors = trainer->num_errors;
|
size_t errors = trainer->num_errors;
|
||||||
|
|
||||||
@@ -608,13 +613,11 @@ bool address_parser_train_epoch(address_parser_t *self, averaged_perceptron_trai
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
|
||||||
|
|
||||||
exit_epoch_training_started:
|
exit_epoch_training_started:
|
||||||
address_parser_data_set_destroy(data_set);
|
address_parser_data_set_destroy(data_set);
|
||||||
address_parser_context_destroy(context);
|
address_parser_context_destroy(context);
|
||||||
|
|
||||||
return success;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool address_parser_train(address_parser_t *self, char *filename, uint32_t num_iterations) {
|
bool address_parser_train(address_parser_t *self, char *filename, uint32_t num_iterations) {
|
||||||
@@ -726,13 +729,6 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
log_info("transliteration module loaded\n");
|
log_info("transliteration module loaded\n");
|
||||||
|
|
||||||
if (!geodb_module_setup(NULL)) {
|
|
||||||
log_error("Could not load geodb dictionaries\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
log_info("geodb module loaded\n");
|
|
||||||
|
|
||||||
address_parser_t *parser = address_parser_init(filename);
|
address_parser_t *parser = address_parser_init(filename);
|
||||||
|
|
||||||
if (parser == NULL) {
|
if (parser == NULL) {
|
||||||
@@ -757,6 +753,5 @@ int main(int argc, char **argv) {
|
|||||||
address_parser_destroy(parser);
|
address_parser_destroy(parser);
|
||||||
|
|
||||||
address_dictionary_module_teardown();
|
address_dictionary_module_teardown();
|
||||||
geodb_module_teardown();
|
|
||||||
log_debug("Done\n");
|
log_debug("Done\n");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user