From a8d6cc4053917d3b6048ff97da1aa1c29f69e301 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 12 Dec 2015 03:54:51 -0500 Subject: [PATCH] [api] Moving parse_address definition into libpostal.h --- src/address_parser.h | 13 ++++--------- src/libpostal.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/address_parser.h b/src/address_parser.h index bb1bdb26..c096b965 100644 --- a/src/address_parser.h +++ b/src/address_parser.h @@ -48,7 +48,7 @@ with the general error-driven averaged perceptron. #include "averaged_perceptron.h" #include "averaged_perceptron_tagger.h" -#include "bloom.h" +#include "libpostal.h" #include "libpostal_config.h" #include "collections.h" #include "normalize.h" @@ -71,7 +71,6 @@ with the general error-driven averaged perceptron. #define SEPARATOR_LABEL "sep" #define FIELD_SEPARATOR_LABEL "fsep" - #define ADDRESS_COMPONENT_HOUSE 1 << 0 #define ADDRESS_COMPONENT_HOUSE_NUMBER 1 << 1 #define ADDRESS_COMPONENT_ROAD 1 << 4 @@ -95,7 +94,7 @@ enum { ADDRESS_PARSER_POSTAL_CODE, ADDRESS_PARSER_COUNTRY, NUM_ADDRESS_PARSER_TYPES -} address_parser_types; +} address_parser_components; typedef union address_parser_types { uint32_t value; @@ -111,6 +110,8 @@ typedef struct address_parser_context { char *country; cstring_array *features; char_array *phrase; + char_array *component_phrase; + char_array *geodb_phrase; uint32_array *separators; cstring_array *normalized; phrase_array *address_dictionary_phrases; @@ -125,12 +126,6 @@ typedef struct address_parser_context { tokenized_string_t *tokenized_str; } address_parser_context_t; -typedef struct address_parser_response { - size_t num_components; - char **components; - char **labels; -} address_parser_response_t; - // Can add other gazetteers as well typedef struct address_parser { averaged_perceptron_t *model; diff --git a/src/libpostal.h b/src/libpostal.h index 1b2a274b..17ab9468 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -12,6 +12,9 @@ extern "C" { #define MAX_LANGUAGE_LEN 4 +/* +Address dictionaries +*/ // Bit set, should be able to keep it at a short (uint16_t) #define ADDRESS_NONE 0 #define ADDRESS_ANY (1 << 0) @@ -84,6 +87,31 @@ static normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { char **expand_address(char *input, normalize_options_t options, uint64_t *n); +/* +Address parser +*/ + +typedef struct address_parser_response { + size_t num_components; + char **components; + char **labels; +} address_parser_response_t; + +typedef struct address_parser_options { + char *language; + char *country; +} address_parser_options_t; + +static address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS = { + .language = NULL, + .country = NULL +}; + +address_parser_response_t *parse_address(char *address, address_parser_options_t options); + +// Setup/teardown methods + + bool libpostal_setup(void); void libpostal_teardown(void);