diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 9d233b78..3ae093c7 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -229,24 +229,27 @@ class AddressExpander(object): join_phrase = six.u(' ') if whitespace else six.u('') if num_phrases > 0: - current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len] - - current_phrase = join_phrase.join([t for t, c in current_phrase_tokens]) - # Handles cases like addr:city="Harlem" when Harlem is a neighborhood - tags = components.get(current_phrase, set()) - if tags and tag not in tags: - return None - # Return phrase with original capitalization return join_phrase.join([t for t, c in tokens[:total_tokens]]) elif num_phrases == 0 and total_tokens > 0: phrase = join_phrase.join([t for t, c in phrase_tokens]) if tag not in components.get(phrase, set()): return None + elif num_phrases == 0: + current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len] + current_phrase = join_phrase.join([t for t, c in current_phrase_tokens]) current_phrase_start = total_tokens current_phrase_len = len(phrase_tokens) + current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len] + + current_phrase = join_phrase.join([t for t, c in current_phrase_tokens]) + # Handles cases like addr:city="Harlem" when Harlem is a neighborhood + tags = components.get(current_phrase, set()) + if tags and tag not in tags: + return None + total_tokens += len(phrase_tokens) num_phrases += 1 else: