diff --git a/src/gazetteers.h b/src/gazetteers.h index fea59f89..d9567851 100644 --- a/src/gazetteers.h +++ b/src/gazetteers.h @@ -10,25 +10,9 @@ extern "C" { #include #include "klib/kvec.h" +#include "libpostal.h" #include "vector.h" -// Bit set, should be able to keep it at a short (uint16_t) -#define ADDRESS_ANY 1 << 0 -#define ADDRESS_NAME 1 << 1 -#define ADDRESS_HOUSE_NUMBER 1 << 2 -#define ADDRESS_STREET 1 << 3 -#define ADDRESS_UNIT 1 << 4 - -#define ADDRESS_LOCALITY 1 << 7 -#define ADDRESS_ADMIN1 1 << 8 -#define ADDRESS_ADMIN2 1 << 9 -#define ADDRESS_ADMIN3 1 << 10 -#define ADDRESS_ADMIN4 1 << 11 -#define ADDRESS_ADMIN_OTHER 1 << 12 -#define ADDRESS_COUNTRY 1 << 13 -#define ADDRESS_POSTAL_CODE 1 << 14 -#define ADDRESS_NEIGHBORHOOD 1 << 15 - typedef enum dictionary_type { DICTIONARY_ANY = 1, DICTIONARY_SYNONYM = 2, diff --git a/src/libpostal.c b/src/libpostal.c index a2db7cc0..85e8a332 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -5,10 +5,12 @@ #include "address_dictionary.h" #include "collections.h" +#include "constants.h" #include "geodb.h" #include "numex.h" #include "normalize.h" #include "scanner.h" +#include "string_utils.h" #include "transliterate.h" typedef struct phrase_language { @@ -441,7 +443,7 @@ void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_strings -cstring_array *expand_address(char *input, normalize_options_t options) { +char **expand_address(char *input, normalize_options_t options, uint64_t *n) { options.address_components |= ADDRESS_ANY; uint64_t normalize_string_options = 0; @@ -456,7 +458,7 @@ cstring_array *expand_address(char *input, normalize_options_t options) { string_tree_t *tree = normalize_string(input, normalize_string_options); - cstring_array *strings = cstring_array_new_size(len); + cstring_array *strings = cstring_array_new_size(len * 2); char_array *temp_string = char_array_new_size(len); khash_t(str_set) *unique_strings = kh_init(str_set); @@ -512,7 +514,9 @@ cstring_array *expand_address(char *input, normalize_options_t options) { char_array_destroy(temp_string); string_tree_destroy(tree); - return strings; + *n = cstring_array_num_strings(strings); + + return cstring_array_to_strings(strings); } diff --git a/src/libpostal.h b/src/libpostal.h index da1d007f..3ee6d337 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -3,14 +3,31 @@ #include #include +#include -#include "constants.h" -#include "string_utils.h" +#define MAX_LANGUAGE_LEN 4 + +// Bit set, should be able to keep it at a short (uint16_t) +#define ADDRESS_ANY 1 << 0 +#define ADDRESS_NAME 1 << 1 +#define ADDRESS_HOUSE_NUMBER 1 << 2 +#define ADDRESS_STREET 1 << 3 +#define ADDRESS_UNIT 1 << 4 + +#define ADDRESS_LOCALITY 1 << 7 +#define ADDRESS_ADMIN1 1 << 8 +#define ADDRESS_ADMIN2 1 << 9 +#define ADDRESS_ADMIN3 1 << 10 +#define ADDRESS_ADMIN4 1 << 11 +#define ADDRESS_ADMIN_OTHER 1 << 12 +#define ADDRESS_COUNTRY 1 << 13 +#define ADDRESS_POSTAL_CODE 1 << 14 +#define ADDRESS_NEIGHBORHOOD 1 << 15 typedef struct normalize_options { // List of language codes int num_languages; - char *languages[MAX_LANGUAGE_LEN]; + char **languages; uint16_t address_components; // String options @@ -35,7 +52,7 @@ typedef struct normalize_options { } normalize_options_t; -cstring_array *expand_address(char *input, normalize_options_t options); +char **expand_address(char *input, normalize_options_t options, uint64_t *n); bool libpostal_setup(void); void libpostal_teardown(void);