[fix] in root expansions, removing phrases that are invalid for the given components if there are other ignorable components

This commit is contained in:
Al
2018-01-02 03:49:52 -05:00
parent d8a0a344cd
commit 071aee0e85

View File

@@ -1106,6 +1106,7 @@ string_tree_t *add_string_alternatives_phrase_option(char *str, libpostal_normal
if (delete_phrases) { if (delete_phrases) {
bool is_ignorable = address_expansion_is_ignorable_for_components(expansion, options.address_components); bool is_ignorable = address_expansion_is_ignorable_for_components(expansion, options.address_components);
bool is_canonical = expansion.canonical_index == NULL_CANONICAL_INDEX; bool is_canonical = expansion.canonical_index == NULL_CANONICAL_INDEX;
bool is_valid_for_components = address_expansion_is_valid_for_components(expansion, options.address_components);
log_debug("is_ignorable = %d, is_canonical = %d, is_ambiguous = %d, current_phrase_have_ambiguous = %d, current_phrase_have_unambiguous = %d, have_strictly_ignorable = %d, current_phrase_have_ignorable=%d, current_phrase_have_possible_root=%d\n", is_ignorable, is_canonical, is_ambiguous, current_phrase_have_ambiguous, current_phrase_have_unambiguous, have_strictly_ignorable, current_phrase_have_ignorable, current_phrase_have_possible_root); log_debug("is_ignorable = %d, is_canonical = %d, is_ambiguous = %d, current_phrase_have_ambiguous = %d, current_phrase_have_unambiguous = %d, have_strictly_ignorable = %d, current_phrase_have_ignorable=%d, current_phrase_have_possible_root=%d\n", is_ignorable, is_canonical, is_ambiguous, current_phrase_have_ambiguous, current_phrase_have_unambiguous, have_strictly_ignorable, current_phrase_have_ignorable, current_phrase_have_possible_root);
@@ -1141,6 +1142,10 @@ string_tree_t *add_string_alternatives_phrase_option(char *str, libpostal_normal
log_debug("current_phrase_have_ambiguous && have_non_phrase_tokens\n"); log_debug("current_phrase_have_ambiguous && have_non_phrase_tokens\n");
log_debug("current_phrase_ignorable = %d\n", current_phrase_ignorable); log_debug("current_phrase_ignorable = %d\n", current_phrase_ignorable);
} else if (!is_valid_for_components) {
log_debug("!is_valid_for_components\n");
current_phrase_ignorable = current_phrase_have_ignorable;
log_debug("current_phrase_ignorable = %d\n", current_phrase_ignorable);
} else { } else {
log_debug("none of the above\n"); log_debug("none of the above\n");
} }