From 0d8d3961084cc9f770e66cc527ef65bf952589f1 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 7 Dec 2015 18:09:20 -0500 Subject: [PATCH] [expansion] Fixing cases like ML King where a global (all languages) expansion subsumes the specific language expansion (like English) --- src/libpostal.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index f1b2f8eb..f7236db0 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -23,7 +23,7 @@ typedef struct phrase_language { VECTOR_INIT(phrase_language_array, phrase_language_t) -#define ks_lt_phrase_language(a, b) ((a).phrase.start < (b).phrase.start) +#define ks_lt_phrase_language(a, b) ((a).phrase.start < (b).phrase.start || ((a).phrase.start == (b).phrase.start && (a).phrase.len > (b).phrase.len)) KSORT_INIT(phrase_language_array, phrase_language_t, ks_lt_phrase_language) @@ -167,6 +167,12 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) { for (int i = 0; i < phrases->n; i++) { phrase_lang = phrases->a[i]; + + phrase_t phrase = phrase_lang.phrase; + if (phrase.start < start) { + continue; + } + char_array_clear(key); char_array_cat(key, phrase_lang.language); @@ -174,8 +180,6 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) { size_t namespace_len = key->n; - phrase_t phrase = phrase_lang.phrase; - end = phrase.start; for (int j = start; j < end; j++) { @@ -196,7 +200,7 @@ string_tree_t *add_string_alternatives(char *str, normalize_options_t options) { token_t token; - if (value.components & options.address_components) { + if ((value.components & options.address_components) > 0) { key->n = namespace_len; for (int j = phrase.start; j < phrase.start + phrase.len; j++) { token = tokens->a[j];