[fix] ignore punctuation in strip_components

This commit is contained in:
Al
2017-02-11 01:00:37 -05:00
parent f07d93df2c
commit f0dfd7850c

View File

@@ -430,17 +430,11 @@ class AddressComponents(object):
t, c = tokens t, c = tokens
if stripped and c not in (token_types.IDEOGRAPHIC_CHAR, token_types.IDEOGRAPHIC_NUMBER): if stripped and c not in (token_types.IDEOGRAPHIC_CHAR, token_types.IDEOGRAPHIC_NUMBER):
stripped.append(u' ') stripped.append(u' ')
stripped.append(t) if c not in PUNCTUATION_TOKEN_TYPES:
stripped.append(t)
name = u''.join(stripped) name = u''.join(stripped)
if self.parens_regex.search(name):
name = self.parens_regex.sub(six.u(''), name).strip()
# If the name contains a comma, stop and only use the phrase before the comma
if ',' in name:
return name.split(',', 1)[0].strip()
return name return name
parens_regex = re.compile('\(.*?\)') parens_regex = re.compile('\(.*?\)')