From f0dfd7850c8e1674c59207e2d4790fe140c42cad Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 11 Feb 2017 01:00:37 -0500 Subject: [PATCH] [fix] ignore punctuation in strip_components --- scripts/geodata/addresses/components.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 1d2a20aa..0766aa41 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -430,17 +430,11 @@ class AddressComponents(object): t, c = tokens if stripped and c not in (token_types.IDEOGRAPHIC_CHAR, token_types.IDEOGRAPHIC_NUMBER): stripped.append(u' ') - stripped.append(t) + if c not in PUNCTUATION_TOKEN_TYPES: + stripped.append(t) name = u''.join(stripped) - if self.parens_regex.search(name): - name = self.parens_regex.sub(six.u(''), name).strip() - - # If the name contains a comma, stop and only use the phrase before the comma - if ',' in name: - return name.split(',', 1)[0].strip() - return name parens_regex = re.compile('\(.*?\)')