diff --git a/src/address_parser_cli.c b/src/address_parser_cli.c index 4cf9b70a..1564cee6 100644 --- a/src/address_parser_cli.c +++ b/src/address_parser_cli.c @@ -109,7 +109,7 @@ int main(int argc, char **argv) { } address_parser_response_t *parsed; - address_parser_options_t options = LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; + address_parser_options_t options = get_libpostal_address_parser_default_options(); if ((parsed = parse_address(input, options))) { printf("\n"); diff --git a/src/bench.c b/src/bench.c index ec165cbf..a3bda385 100644 --- a/src/bench.c +++ b/src/bench.c @@ -38,7 +38,7 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - normalize_options_t options = LIBPOSTAL_DEFAULT_OPTIONS; + normalize_options_t options = get_libpostal_default_options(); options.num_languages = 1; options.languages = languages; diff --git a/src/libpostal.c b/src/libpostal.c index 1f59b3a5..7c56cc9c 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -32,6 +32,35 @@ KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language) #define DEFAULT_KEY_LEN 32 + +static normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { + .languages = NULL, + .num_languages = 0, + .address_components = ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT, + .latin_ascii = 1, + .transliterate = 1, + .strip_accents = 1, + .decompose = 1, + .lowercase = 1, + .trim_string = 1, + .drop_parentheticals = 1, + .replace_numeric_hyphens = 0, + .delete_numeric_hyphens = 0, + .split_alpha_from_numeric = 1, + .replace_word_hyphens = 1, + .delete_word_hyphens = 1, + .delete_final_periods = 1, + .delete_acronym_periods = 1, + .drop_english_possessives = 1, + .delete_apostrophes = 1, + .expand_numex = 1, + .roman_numerals = 1 +}; + +inline normalize_options_t get_libpostal_default_options(void) { + return LIBPOSTAL_DEFAULT_OPTIONS; +} + static inline uint64_t get_normalize_token_options(normalize_options_t options) { uint64_t normalize_token_options = 0; @@ -978,7 +1007,14 @@ void address_parser_response_destroy(address_parser_response_t *self) { free(self); } +static address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS = { + .language = NULL, + .country = NULL +}; +inline address_parser_options_t get_libpostal_address_parser_default_options(void) { + return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS; +} address_parser_response_t *parse_address(char *address, address_parser_options_t options) { address_parser_context_t *context = address_parser_context_new(); diff --git a/src/libpostal.h b/src/libpostal.h index f48838a1..a817df0d 100644 --- a/src/libpostal.h +++ b/src/libpostal.h @@ -62,29 +62,7 @@ typedef struct normalize_options { } normalize_options_t; -static normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = { - NULL, // languages - 0, // num_languages - ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT, // address_components - 1, // latin_ascii - 1, // transliterate - 1, // strip_accents - 1, // decompose - 1, // lowercase - 1, // trim_string - 1, // drop_parentheticals - 0, // replace_numeric_hyphens - 0, // delete_numeric_hyphens - 1, // split_alpha_from_numeric - 1, // replace_word_hyphens - 1, // delete_word_hyphens - 1, // delete_final_periods - 1, // delete_acronym_periods - 1, // drop_english_possessives - 1, // delete_apostrophes - 1, // expand_numex - 1 // roman_numerals -}; +normalize_options_t get_libpostal_default_options(void); char **expand_address(char *input, normalize_options_t options, size_t *n); @@ -103,17 +81,12 @@ typedef struct address_parser_options { char *country; } address_parser_options_t; -static address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS = { - NULL, // language - NULL // country -}; - void address_parser_response_destroy(address_parser_response_t *self); +address_parser_options_t get_libpostal_address_parser_default_options(void); + address_parser_response_t *parse_address(char *address, address_parser_options_t options); - - // Setup/teardown methods diff --git a/src/main.c b/src/main.c index f237cdfd..22be1a76 100644 --- a/src/main.c +++ b/src/main.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - normalize_options_t options = LIBPOSTAL_DEFAULT_OPTIONS; + normalize_options_t options = get_libpostal_default_options(); if (languages != NULL) { options.languages = languages->a; diff --git a/test/test_expand.c b/test/test_expand.c index d3ab7d2e..bed8ff69 100644 --- a/test/test_expand.c +++ b/test/test_expand.c @@ -66,16 +66,20 @@ static greatest_test_res test_expansion_contains_with_languages(char *input, cha TEST test_expansions(void) { - CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", LIBPOSTAL_DEFAULT_OPTIONS, 1, "en")); - CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", LIBPOSTAL_DEFAULT_OPTIONS, 1, "de")); - CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", LIBPOSTAL_DEFAULT_OPTIONS, 1, "nl")); - CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", LIBPOSTAL_DEFAULT_OPTIONS, 1, "th")); + normalize_options_t options = get_libpostal_default_options(); + + CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en")); + CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de")); + CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl")); + CHECK_CALL(test_expansion_contains_with_languages("มงแตร", "มงแตร", options, 1, "th")); PASS(); } TEST test_expansions_language_classifier(void) { - CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", LIBPOSTAL_DEFAULT_OPTIONS, 0, NULL)); - CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", LIBPOSTAL_DEFAULT_OPTIONS, 0, NULL)); + normalize_options_t options = get_libpostal_default_options(); + + CHECK_CALL(test_expansion_contains_with_languages("V XX Sett", "via 20 settembre", options, 0, NULL)); + CHECK_CALL(test_expansion_contains_with_languages("C/ Ocho", "calle 8", options, 0, NULL)); PASS(); }