[osm] adding alt_place_names to the shared formatting class AddressComponents and making them classmethods

This commit is contained in:
Al
2016-10-20 20:41:22 -04:00
parent d9bc465c82
commit 00ebdfed7f
2 changed files with 43 additions and 39 deletions

View File

@@ -1227,14 +1227,17 @@ class AddressComponents(object):
whitespace_regex = re.compile('(?<=[\w])[\s]+(?=[\w])')
hyphen_regex = re.compile('[\s]*[\-]+[\s]*')
def dehyphenate_multiword_name(self, name):
return self.hyphen_regex.sub(six.u(' '), name)
@classmethod
def dehyphenate_multiword_name(cls, name):
return cls.hyphen_regex.sub(six.u(' '), name)
def hyphenate_multiword_name(self, name):
return self.whitespace_regex.sub(six.u('-'), name)
@classmethod
def hyphenate_multiword_name(cls, name):
return cls.whitespace_regex.sub(six.u('-'), name)
def strip_whitespace_and_hyphens(self, name):
return self.name_regex.match(name).group(1)
@classmethod
def strip_whitespace_and_hyphens(cls, name):
return cls.name_regex.match(name).group(1)
def name_hyphens(self, name, hyphenate_multiword_probability=None, remove_hyphen_probability=None):
'''
@@ -1259,6 +1262,37 @@ class AddressComponents(object):
return self.hyphenate_multiword_name(name)
return name
@classmethod
def alt_place_names(cls, name, language):
names = []
abbrev_name = abbreviate(toponym_abbreviations_gazetteer, name, language, abbreviate_prob=1.0)
if abbrev_name != name:
names.append(abbrev_name)
sans_hyphens = cls.dehyphenate_multiword_name(name)
if sans_hyphens != name:
names.append(sans_hyphens)
abbrev_sans_hyphens = abbreviate(toponym_abbreviations_gazetteer, sans_hyphens, language, abbreviate_prob=1.0)
if abbrev_sans_hyphens != sans_hyphens:
names.append(abbrev_sans_hyphens)
abbrev_hyphens = cls.hyphenate_multiword_name(abbrev_sans_hyphens)
if abbrev_hyphens != abbrev_sans_hyphens:
names.append(abbrev_hyphens)
with_hyphens = cls.hyphenate_multiword_name(name)
if with_hyphens != name:
names.append(with_hyphens)
if abbrev_name != name:
abbrev_name_hyphens = cls.hyphenate_multiword_name(abbrev_name)
if abbrev_name_hyphens != abbrev_name:
names.append(abbrev_name_hyphens)
return names
def country_specific_cleanup(self, address_components, country):
if country == self.IRELAND:
return self.format_dublin_postal_district(address_components)

View File

@@ -473,36 +473,6 @@ class OSMAddressFormatter(object):
return postal_codes
def alt_place_names(self, name, language):
names = []
abbrev_name = abbreviate(toponym_abbreviations_gazetteer, name, language, abbreviate_prob=1.0)
if abbrev_name != name:
names.append(abbrev_name)
sans_hyphens = self.components.dehyphenate_multiword_name(name)
if sans_hyphens != name:
names.append(sans_hyphens)
abbrev_sans_hyphens = abbreviate(toponym_abbreviations_gazetteer, sans_hyphens, language, abbreviate_prob=1.0)
if abbrev_sans_hyphens != sans_hyphens:
names.append(abbrev_sans_hyphens)
abbrev_hyphens = self.components.hyphenate_multiword_name(abbrev_sans_hyphens)
if abbrev_hyphens != abbrev_sans_hyphens:
names.append(abbrev_hyphens)
with_hyphens = self.components.hyphenate_multiword_name(name)
if with_hyphens != name:
names.append(with_hyphens)
if abbrev_name != name:
abbrev_name_hyphens = self.components.hyphenate_multiword_name(abbrev_name)
if abbrev_name_hyphens != abbrev_name:
names.append(abbrev_name_hyphens)
return names
def node_place_tags(self, tags):
try:
latitude, longitude = latlon_to_decimal(tags['lat'], tags['lon'])
@@ -643,7 +613,7 @@ class OSMAddressFormatter(object):
name = self.components.strip_whitespace_and_hyphens(name)
alt_names = self.alt_place_names(name, None)
alt_names = self.components.alt_place_names(name, None)
for i in xrange(num_references if name_tag == 'name' else 1):
address_components = {component_name: name}
@@ -687,7 +657,7 @@ class OSMAddressFormatter(object):
name = self.components.strip_whitespace_and_hyphens(name)
alt_names = self.alt_place_names(name, language)
alt_names = self.components.alt_place_names(name, language)
for i in xrange(n):
address_components = {component_name: name}
@@ -722,7 +692,7 @@ class OSMAddressFormatter(object):
name = self.components.strip_whitespace_and_hyphens(name)
alt_names = self.alt_place_names(name, language)
alt_names = self.components.alt_place_names(name, language)
# Add half as many English records as the local language, every other language gets min_referenes / 2
for i in xrange(num_references / 2 if language == ENGLISH else min_references / 2):