[fix] check the first phrase for components and bail if it matches something other than the specified tag

This commit is contained in:
Al
2016-05-05 12:46:01 -04:00
parent 8370a41ec0
commit e5fdd915d0

View File

@@ -229,6 +229,19 @@ class AddressExpander(object):
join_phrase = six.u(' ') if whitespace else six.u('') join_phrase = six.u(' ') if whitespace else six.u('')
if num_phrases > 0: if num_phrases > 0:
# Return phrase with original capitalization
return join_phrase.join([t for t, c in tokens[:total_tokens]])
elif num_phrases == 0 and total_tokens > 0:
phrase = join_phrase.join([t for t, c in phrase_tokens])
if tag not in components.get(phrase, set()):
return None
elif num_phrases == 0:
current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len]
current_phrase = join_phrase.join([t for t, c in current_phrase_tokens])
current_phrase_start = total_tokens
current_phrase_len = len(phrase_tokens)
current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len] current_phrase_tokens = tokens_lower[current_phrase_start:current_phrase_start + current_phrase_len]
current_phrase = join_phrase.join([t for t, c in current_phrase_tokens]) current_phrase = join_phrase.join([t for t, c in current_phrase_tokens])
@@ -237,16 +250,6 @@ class AddressExpander(object):
if tags and tag not in tags: if tags and tag not in tags:
return None return None
# Return phrase with original capitalization
return join_phrase.join([t for t, c in tokens[:total_tokens]])
elif num_phrases == 0 and total_tokens > 0:
phrase = join_phrase.join([t for t, c in phrase_tokens])
if tag not in components.get(phrase, set()):
return None
current_phrase_start = total_tokens
current_phrase_len = len(phrase_tokens)
total_tokens += len(phrase_tokens) total_tokens += len(phrase_tokens)
num_phrases += 1 num_phrases += 1
else: else: