[fix] deriving whitespace and state in normalized_place_name, adding all candidate languages to arguments
This commit is contained in:
@@ -188,7 +188,7 @@ class AddressExpander(object):
|
|||||||
names.add(v)
|
names.add(v)
|
||||||
return names
|
return names
|
||||||
|
|
||||||
def normalized_place_name(self, name, tag, osm_components, country=None, state=None, languages=None, whitespace=True):
|
def normalized_place_name(self, name, tag, osm_components, country=None, languages=None):
|
||||||
'''
|
'''
|
||||||
Multiple place names
|
Multiple place names
|
||||||
--------------------
|
--------------------
|
||||||
@@ -210,11 +210,11 @@ class AddressExpander(object):
|
|||||||
for cn in component_names:
|
for cn in component_names:
|
||||||
components[cn.lower()].add(normalized_key)
|
components[cn.lower()].add(normalized_key)
|
||||||
|
|
||||||
if country and languages and state:
|
if normalized_key == AddressFormatter.STATE:
|
||||||
for language in languages:
|
for language in languages:
|
||||||
state_code = state_abbreviations.get_abbreviation(country, language, state)
|
state_code = state_abbreviations.get_abbreviation(country, language, state)
|
||||||
if state_code:
|
if state_code:
|
||||||
names.add(state_code.upper())
|
names.add(state_code.upper())
|
||||||
|
|
||||||
phrase_filter = PhraseFilter([(n.lower(), '') for n in names])
|
phrase_filter = PhraseFilter([(n.lower(), '') for n in names])
|
||||||
|
|
||||||
@@ -230,6 +230,7 @@ class AddressExpander(object):
|
|||||||
|
|
||||||
for is_phrase, phrase_tokens, value in phrases:
|
for is_phrase, phrase_tokens, value in phrases:
|
||||||
if is_phrase:
|
if is_phrase:
|
||||||
|
whitespace = not any((c in token_types.IDEOGRAPHIC_CHAR, token_types.IDEOGRAPHIC_NUMBER) for t, c in current_phrase_tokens)
|
||||||
join_phrase = six.u(' ') if whitespace else six.u('')
|
join_phrase = six.u(' ') if whitespace else six.u('')
|
||||||
|
|
||||||
if num_phrases > 0:
|
if num_phrases > 0:
|
||||||
@@ -267,13 +268,12 @@ class AddressExpander(object):
|
|||||||
|
|
||||||
def normalize_place_names(self, address_components, osm_components, country=None, languages=None, whitespace=True):
|
def normalize_place_names(self, address_components, osm_components, country=None, languages=None, whitespace=True):
|
||||||
components = {}
|
components = {}
|
||||||
state = address_components.get(AddressFormatter.STATE, None)
|
|
||||||
|
|
||||||
for key in list(address_components):
|
for key in list(address_components):
|
||||||
name = address_components[key]
|
name = address_components[key]
|
||||||
if key in self.BOUNDARY_COMPONENTS:
|
if key in self.BOUNDARY_COMPONENTS:
|
||||||
name = self.normalized_place_name(name, key, osm_components, country=country,
|
name = self.normalized_place_name(name, key, osm_components, country=country,
|
||||||
state=state, languages=languages, whitespace=whitespace)
|
languages=languages, whitespace=whitespace)
|
||||||
|
|
||||||
components[key] = name
|
components[key] = name
|
||||||
return components
|
return components
|
||||||
@@ -710,8 +710,10 @@ class AddressExpander(object):
|
|||||||
|
|
||||||
street = address_components.get(AddressFormatter.ROAD)
|
street = address_components.get(AddressFormatter.ROAD)
|
||||||
|
|
||||||
|
all_languages = set([l['lang'] for l in candidate_languages])
|
||||||
|
|
||||||
all_osm_components = osm_components + neighborhoods
|
all_osm_components = osm_components + neighborhoods
|
||||||
self.normalize_place_names(address_components, all_osm_components, country=country)
|
self.normalize_place_names(address_components, all_osm_components, country=country, languages=all_languages)
|
||||||
|
|
||||||
self.replace_name_affixes(address_components)
|
self.replace_name_affixes(address_components)
|
||||||
|
|
||||||
@@ -783,8 +785,10 @@ class AddressExpander(object):
|
|||||||
self.add_neighborhoods(address_components, neighborhoods,
|
self.add_neighborhoods(address_components, neighborhoods,
|
||||||
osm_suffix=osm_suffix)
|
osm_suffix=osm_suffix)
|
||||||
|
|
||||||
|
all_languages = set([l['lang'] for l in candidate_languages])
|
||||||
|
|
||||||
all_osm_components = osm_components + neighborhoods
|
all_osm_components = osm_components + neighborhoods
|
||||||
self.normalize_place_names(address_components, all_osm_components, country=country)
|
self.normalize_place_names(address_components, all_osm_components, country=country, languages=all_languages)
|
||||||
|
|
||||||
self.replace_name_affixes(address_components)
|
self.replace_name_affixes(address_components)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user