From 071aee0e853d1577dba2d89775e138a3e4d2e5aa Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 2 Jan 2018 03:49:52 -0500 Subject: [PATCH] [fix] in root expansions, removing phrases that are invalid for the given components if there are other ignorable components --- src/expand.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/expand.c b/src/expand.c index b0d62e3c..2268acc6 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1106,6 +1106,7 @@ string_tree_t *add_string_alternatives_phrase_option(char *str, libpostal_normal if (delete_phrases) { bool is_ignorable = address_expansion_is_ignorable_for_components(expansion, options.address_components); bool is_canonical = expansion.canonical_index == NULL_CANONICAL_INDEX; + bool is_valid_for_components = address_expansion_is_valid_for_components(expansion, options.address_components); log_debug("is_ignorable = %d, is_canonical = %d, is_ambiguous = %d, current_phrase_have_ambiguous = %d, current_phrase_have_unambiguous = %d, have_strictly_ignorable = %d, current_phrase_have_ignorable=%d, current_phrase_have_possible_root=%d\n", is_ignorable, is_canonical, is_ambiguous, current_phrase_have_ambiguous, current_phrase_have_unambiguous, have_strictly_ignorable, current_phrase_have_ignorable, current_phrase_have_possible_root); @@ -1141,6 +1142,10 @@ string_tree_t *add_string_alternatives_phrase_option(char *str, libpostal_normal log_debug("current_phrase_have_ambiguous && have_non_phrase_tokens\n"); log_debug("current_phrase_ignorable = %d\n", current_phrase_ignorable); + } else if (!is_valid_for_components) { + log_debug("!is_valid_for_components\n"); + current_phrase_ignorable = current_phrase_have_ignorable; + log_debug("current_phrase_ignorable = %d\n", current_phrase_ignorable); } else { log_debug("none of the above\n"); }