From 2f5f226faa2bb07cc1a2c008f2d19219e74d698b Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 15 Jul 2016 13:16:22 -0400 Subject: [PATCH 1/3] [fix] Add original string to normalizations if all options were set to false --- src/normalize.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/normalize.c b/src/normalize.c index 515da8ed..9276bf4a 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -100,6 +100,8 @@ void add_latin_alternatives(string_tree_t *tree, char *str, size_t len, uint64_t } free(transliterated); transliterated = NULL; + } else { + string_tree_add_string(tree, str); } if (prev_string != NULL) { From ce78064988cc4c98d9c3e4e3b1b6ba11e052b8a2 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 15 Jul 2016 13:17:33 -0400 Subject: [PATCH 2/3] [fix] NULL checks --- src/libpostal.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index 5d4ddf36..ddb619c4 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -888,6 +888,9 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_ char_array_clear(temp_string); string_tree_iterator_foreach_token(iter, token, { log_debug("token=%s\n", token); + if (token == NULL) { + continue; + } char_array_append(temp_string, token); }) char_array_terminate(temp_string); @@ -933,9 +936,7 @@ char **expand_address(char *input, normalize_options_t options, size_t *n) { options.languages = lang_response->languages; } } - string_tree_t *tree = normalize_string_languages(input, normalize_string_options, options.num_languages, options.languages); - cstring_array *strings = cstring_array_new_size(len * 2); char_array *temp_string = char_array_new_size(len); @@ -959,6 +960,7 @@ char **expand_address(char *input, normalize_options_t options, size_t *n) { bool is_first = true; string_tree_iterator_foreach_token(iter, segment, { + if (segment == NULL) continue; if (!is_first) { char_array_append(temp_string, " "); } From 3e8ab0ba3fd26b54589d52a1f3782ae86d9fe4b2 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 15 Jul 2016 13:26:14 -0400 Subject: [PATCH 3/3] [test] New expansion test with all options set to false --- test/test_expand.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/test_expand.c b/test/test_expand.c index b057c42c..049803ff 100644 --- a/test/test_expand.c +++ b/test/test_expand.c @@ -84,6 +84,31 @@ TEST test_expansions_language_classifier(void) { PASS(); } +TEST test_expansions_no_options(void) { + normalize_options_t options = get_libpostal_default_options(); + options.lowercase = false; + options.latin_ascii = false; + options.transliterate = false; + options.strip_accents = false; + options.decompose = false; + options.trim_string = false; + options.drop_parentheticals = false; + options.replace_numeric_hyphens = false; + options.delete_numeric_hyphens = false; + options.split_alpha_from_numeric = false; + options.replace_word_hyphens = false; + options.delete_word_hyphens = false; + options.delete_final_periods = false; + options.delete_acronym_periods = false; + options.drop_english_possessives = false; + options.delete_apostrophes = false; + options.expand_numex = false; + options.roman_numerals = false; + + CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St New York", "120 E 96th St New York", options, 0, NULL)); + PASS(); +} + SUITE(libpostal_expansion_tests) { @@ -94,6 +119,7 @@ SUITE(libpostal_expansion_tests) { RUN_TEST(test_expansions); RUN_TEST(test_expansions_language_classifier); + RUN_TEST(test_expansions_no_options); libpostal_teardown(); libpostal_teardown_language_classifier();