[addresses] adding Calle to purely numeric Spanish street names in OSM as well
This commit is contained in:
@@ -68,6 +68,7 @@ MACAO = 'mo'
|
|||||||
|
|
||||||
JAPANESE_ROMAJI = 'ja_rm'
|
JAPANESE_ROMAJI = 'ja_rm'
|
||||||
ENGLISH = 'en'
|
ENGLISH = 'en'
|
||||||
|
SPANISH = 'es'
|
||||||
|
|
||||||
JAPANESE = 'ja'
|
JAPANESE = 'ja'
|
||||||
CHINESE = 'zh'
|
CHINESE = 'zh'
|
||||||
@@ -884,6 +885,24 @@ class AddressComponents(object):
|
|||||||
if genitive_probability is not None and random.random() < float(genitive_probability):
|
if genitive_probability is not None and random.random() < float(genitive_probability):
|
||||||
address_components[component] = self.genitive_name(address_components[component], language)
|
address_components[component] = self.genitive_name(address_components[component], language)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def spanish_street_name(cls, street):
|
||||||
|
'''
|
||||||
|
Most Spanish street names begin with Calle officially
|
||||||
|
but since it's so common, this is often omitted entirely.
|
||||||
|
As such, for Spanish-speaking places with numbered streets
|
||||||
|
like Mérida in Mexico, it would be legitimate to have a
|
||||||
|
simple number like "27" for the street name in a GIS
|
||||||
|
data set which omits the Calle. However, we don't really
|
||||||
|
want to train on "27/road 1/house_number" as that's not
|
||||||
|
typically how a numeric-only street would be written. However,
|
||||||
|
we don't want to neglect entire cities like Mérida which are
|
||||||
|
predominantly a grid, so add Calle (may be abbreviated later).
|
||||||
|
'''
|
||||||
|
if is_numeric(street):
|
||||||
|
street = six.u('Calle {}').format(street)
|
||||||
|
return street
|
||||||
|
|
||||||
def abbreviated_state(self, state, country, language):
|
def abbreviated_state(self, state, country, language):
|
||||||
abbreviate_state_prob = float(nested_get(self.config, ('state', 'abbreviated_probability')))
|
abbreviate_state_prob = float(nested_get(self.config, ('state', 'abbreviated_probability')))
|
||||||
|
|
||||||
@@ -1672,8 +1691,14 @@ class AddressComponents(object):
|
|||||||
self.add_neighborhoods(address_components, neighborhoods, country, language, non_local_language=non_local_language,
|
self.add_neighborhoods(address_components, neighborhoods, country, language, non_local_language=non_local_language,
|
||||||
language_suffix=language_suffix)
|
language_suffix=language_suffix)
|
||||||
|
|
||||||
street = address_components.get(AddressFormatter.ROAD)
|
|
||||||
self.cleanup_street(address_components)
|
self.cleanup_street(address_components)
|
||||||
|
street = address_components.get(AddressFormatter.ROAD)
|
||||||
|
|
||||||
|
if language == SPANISH and street:
|
||||||
|
norm_street = self.spanish_street_name(street)
|
||||||
|
if norm_street:
|
||||||
|
address_components[AddressFormatter.ROAD] = norm_street
|
||||||
|
street = norm_street
|
||||||
|
|
||||||
self.cleanup_boundary_names(address_components)
|
self.cleanup_boundary_names(address_components)
|
||||||
|
|
||||||
|
|||||||
@@ -195,24 +195,6 @@ class OpenAddressesFormatter(object):
|
|||||||
pass
|
pass
|
||||||
return num
|
return num
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def spanish_street_name(cls, street):
|
|
||||||
'''
|
|
||||||
Most Spanish street names begin with Calle officially
|
|
||||||
but since it's so common, this is often omitted entirely.
|
|
||||||
As such, for Spanish-speaking places with numbered streets
|
|
||||||
like Mérida in Mexico, it would be legitimate to have a
|
|
||||||
simple number like "27" for the street name in a GIS
|
|
||||||
data set which omits the Calle. However, we don't really
|
|
||||||
want to train on "27/road 1/house_number" as that's not
|
|
||||||
typically how a numeric-only street would be written. However,
|
|
||||||
we don't want to neglect entire cities like Mérida which are
|
|
||||||
predominantly a grid, so add Calle (may be abbreviated later).
|
|
||||||
'''
|
|
||||||
if is_numeric(street):
|
|
||||||
street = six.u('Calle {}').format(street)
|
|
||||||
return street
|
|
||||||
|
|
||||||
def strip_unit_phrases_for_language(self, value, language):
|
def strip_unit_phrases_for_language(self, value, language):
|
||||||
if language in self.unit_type_regexes:
|
if language in self.unit_type_regexes:
|
||||||
return self.unit_type_regexes[language].sub(six.u(''), value)
|
return self.unit_type_regexes[language].sub(six.u(''), value)
|
||||||
@@ -300,7 +282,7 @@ class OpenAddressesFormatter(object):
|
|||||||
value = mapped_values[key].get(value, value)
|
value = mapped_values[key].get(value, value)
|
||||||
|
|
||||||
if key == AddressFormatter.ROAD and language == SPANISH:
|
if key == AddressFormatter.ROAD and language == SPANISH:
|
||||||
value = self.spanish_street_name(value)
|
value = self.components.spanish_street_name(value)
|
||||||
|
|
||||||
if key in AddressFormatter.BOUNDARY_COMPONENTS and key != AddressFormatter.POSTCODE:
|
if key in AddressFormatter.BOUNDARY_COMPONENTS and key != AddressFormatter.POSTCODE:
|
||||||
if add_osm_boundaries:
|
if add_osm_boundaries:
|
||||||
|
|||||||
Reference in New Issue
Block a user