[fix] check the first phrase for components and bail if it matches something other than the specified tag
This commit is contained in:
@@ -229,24 +229,27 @@ class AddressExpander(object):
|
||||
join_phrase = six.u(' ') if whitespace else six.u('')
|
||||
|
||||
if num_phrases > 0:
|
||||
current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len]
|
||||
|
||||
current_phrase = join_phrase.join([t for t, c in current_phrase_tokens])
|
||||
# Handles cases like addr:city="Harlem" when Harlem is a neighborhood
|
||||
tags = components.get(current_phrase, set())
|
||||
if tags and tag not in tags:
|
||||
return None
|
||||
|
||||
# Return phrase with original capitalization
|
||||
return join_phrase.join([t for t, c in tokens[:total_tokens]])
|
||||
elif num_phrases == 0 and total_tokens > 0:
|
||||
phrase = join_phrase.join([t for t, c in phrase_tokens])
|
||||
if tag not in components.get(phrase, set()):
|
||||
return None
|
||||
elif num_phrases == 0:
|
||||
current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len]
|
||||
current_phrase = join_phrase.join([t for t, c in current_phrase_tokens])
|
||||
|
||||
current_phrase_start = total_tokens
|
||||
current_phrase_len = len(phrase_tokens)
|
||||
|
||||
current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len]
|
||||
|
||||
current_phrase = join_phrase.join([t for t, c in current_phrase_tokens])
|
||||
# Handles cases like addr:city="Harlem" when Harlem is a neighborhood
|
||||
tags = components.get(current_phrase, set())
|
||||
if tags and tag not in tags:
|
||||
return None
|
||||
|
||||
total_tokens += len(phrase_tokens)
|
||||
num_phrases += 1
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user