[addresses] stripping parentheticals in admin boundary names as sometimes cities in e.g. Switzerland are like Oberwil (ZG) in OSM

This commit is contained in:
Al
2017-01-08 03:43:22 -05:00
parent e10c156176
commit a6d94f998b

View File

@@ -418,6 +418,8 @@ class AddressComponents(object):
return names, components return names, components
parens_regex = re.compile('\(.*?\)')
def normalized_place_name(self, name, tag, osm_components, country=None, languages=None, phrase_from_component=False): def normalized_place_name(self, name, tag, osm_components, country=None, languages=None, phrase_from_component=False):
''' '''
Multiple place names Multiple place names
@@ -480,6 +482,9 @@ class AddressComponents(object):
else: else:
total_tokens += 1 total_tokens += 1
if self.parens_regex.search(name):
name = parens_regex.sub(six.u(''), name).strip()
# If the name contains a comma, stop and only use the phrase before the comma # If the name contains a comma, stop and only use the phrase before the comma
if ',' in name: if ',' in name:
return name.split(',', 1)[0].strip() return name.split(',', 1)[0].strip()