From 8742574257f5c2d647dcc746d08240d27a210a45 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 4 Apr 2017 20:40:55 -0400 Subject: [PATCH] [parser] storing address_parser_context on the parser struct itself so it doesn't have to be allocated every time --- src/address_parser.c | 17 ++++++++++++++--- src/address_parser.h | 3 ++- src/address_parser_train.c | 10 ++++++++-- src/libpostal.c | 6 +----- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/address_parser.c b/src/address_parser.c index 14d6afaf..b8c935e4 100644 --- a/src/address_parser.c +++ b/src/address_parser.c @@ -287,6 +287,11 @@ bool address_parser_load(char *dir) { fclose(postal_codes_file); + parser->context = address_parser_context_new(); + if (parser->context == NULL) { + goto exit_address_parser_created; + } + char_array_destroy(path); return true; @@ -305,6 +310,10 @@ void address_parser_destroy(address_parser_t *self) { crf_destroy(self->model.crf); } + if (self->context != NULL) { + address_parser_context_destroy(self->context); + } + if (self->vocab != NULL) { trie_destroy(self->vocab); } @@ -1642,15 +1651,17 @@ libpostal_address_parser_response_t *address_parser_response_new(void) { return response; } -libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context) { - if (address == NULL || context == NULL) return NULL; +libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country) { + if (address == NULL) return NULL; address_parser_t *parser = get_address_parser(); - if (parser == NULL) { + if (parser == NULL || parser->context == NULL) { log_error("parser is not setup, call libpostal_setup_address_parser()\n"); return NULL; } + address_parser_context_t *context = parser->context; + char *normalized = address_parser_normalize_string(address); bool is_normalized = normalized != NULL; if (!is_normalized) { diff --git a/src/address_parser.h b/src/address_parser.h index 3504cba6..2518a9ef 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -200,6 +200,7 @@ typedef struct address_parser { averaged_perceptron_t *ap; crf_t *crf; } model; + address_parser_context_t *context; trie_t *vocab; trie_t *phrases; address_parser_types_array *phrase_types; @@ -214,7 +215,7 @@ address_parser_t *address_parser_new_options(parser_options_t options); address_parser_t *get_address_parser(void); bool address_parser_load(char *dir); -libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country, address_parser_context_t *context); +libpostal_address_parser_response_t *address_parser_parse(char *address, char *language, char *country); void address_parser_destroy(address_parser_t *self); char *address_parser_normalize_string(char *str); diff --git a/src/address_parser_train.c b/src/address_parser_train.c index b5d84d2e..5b0b8893 100644 --- a/src/address_parser_train.c +++ b/src/address_parser_train.c @@ -328,6 +328,13 @@ address_parser_t *address_parser_init(char *filename) { return NULL; } + address_parser_context_t *context = address_parser_context_new(); + if (context == NULL) { + log_error("Error allocating context\n"); + return NULL; + } + parser->context = context; + khash_t(str_uint32) *vocab = kh_init(str_uint32); if (vocab == NULL) { log_error("Could not allocate vocab\n"); @@ -1043,7 +1050,7 @@ bool address_parser_train_epoch(address_parser_t *self, void *trainer, char *fil return false; } - address_parser_context_t *context = address_parser_context_new(); + address_parser_context_t *context = self->context; size_t examples = 0; uint64_t errors = address_parser_train_num_errors(self, trainer); @@ -1087,7 +1094,6 @@ bool address_parser_train_epoch(address_parser_t *self, void *trainer, char *fil exit_epoch_training_started: address_parser_data_set_destroy(data_set); - address_parser_context_destroy(context); return true; } diff --git a/src/libpostal.c b/src/libpostal.c index 11e10a2c..92a10dfd 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -1021,18 +1021,14 @@ inline libpostal_address_parser_options_t libpostal_get_address_parser_default_o } libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) { - address_parser_context_t *context = address_parser_context_new(); - libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country, context); + libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country); if (parsed == NULL) { log_error("Parser returned NULL\n"); - address_parser_context_destroy(context); libpostal_address_parser_response_destroy(parsed); return NULL; } - address_parser_context_destroy(context); - return parsed; }