[addresses] move suffix checks to the end of expanded(), check language_suffix rather than language for Romaji, so language can remain Japanese. For CJK languages, change language before adding generated components
This commit is contained in:
@@ -150,6 +150,10 @@ class AddressComponents(object):
|
|||||||
INDUSTRIAL = 'industrial'
|
INDUSTRIAL = 'industrial'
|
||||||
UNIVERSITY = 'university'
|
UNIVERSITY = 'university'
|
||||||
|
|
||||||
|
language_code_aliases = {
|
||||||
|
'zh_py': 'zh_pinyin'
|
||||||
|
}
|
||||||
|
|
||||||
sub_building_component_class_map = {
|
sub_building_component_class_map = {
|
||||||
AddressFormatter.ENTRANCE: Entrance,
|
AddressFormatter.ENTRANCE: Entrance,
|
||||||
AddressFormatter.STAIRCASE: Staircase,
|
AddressFormatter.STAIRCASE: Staircase,
|
||||||
@@ -708,12 +712,8 @@ class AddressComponents(object):
|
|||||||
if ':' not in k:
|
if ':' not in k:
|
||||||
continue
|
continue
|
||||||
splits = k.split(':')
|
splits = k.split(':')
|
||||||
if len(splits) > 0 and splits[0] == 'name' and '_' in splits[-1]:
|
if len(splits) > 0 and splits[0] == 'name' and '_' in splits[-1] and splits[-1].split('_', 1)[0] == use_language:
|
||||||
lang, script = splits[-1].split('_', 1)
|
|
||||||
if lang in CJK_LANGUAGES or script.lower() in self.valid_scripts:
|
|
||||||
language_scripts[splits[-1]] += 1
|
language_scripts[splits[-1]] += 1
|
||||||
else:
|
|
||||||
language_scripts[None] += 1
|
|
||||||
elif k == 'name' or (splits[0] == 'name' and splits[-1]) == use_language:
|
elif k == 'name' or (splits[0] == 'name' and splits[-1]) == use_language:
|
||||||
language_scripts[None] += 1
|
language_scripts[None] += 1
|
||||||
|
|
||||||
@@ -1080,7 +1080,7 @@ class AddressComponents(object):
|
|||||||
self.abbreviate_admin_components(neighborhood_components, country, language)
|
self.abbreviate_admin_components(neighborhood_components, country, language)
|
||||||
|
|
||||||
address_components.update(neighborhood_components)
|
address_components.update(neighborhood_components)
|
||||||
if country == JAPAN and (language == JAPANESE_ROMAJI or non_local_language == ENGLISH):
|
if country == JAPAN and (language_suffix.endswith(JAPANESE_ROMAJI) or non_local_language == ENGLISH):
|
||||||
self.format_japanese_neighborhood_romaji(address_components)
|
self.format_japanese_neighborhood_romaji(address_components)
|
||||||
|
|
||||||
def generate_sub_building_component(self, component, address_components, language, country=None, **kw):
|
def generate_sub_building_component(self, component, address_components, language, country=None, **kw):
|
||||||
@@ -1545,10 +1545,6 @@ class AddressComponents(object):
|
|||||||
language = self.address_language(address_components, candidate_languages)
|
language = self.address_language(address_components, candidate_languages)
|
||||||
non_local_language = self.non_local_language()
|
non_local_language = self.non_local_language()
|
||||||
language_suffix = self.pick_language_suffix(all_osm_components, language, non_local_language, more_than_one_official_language)
|
language_suffix = self.pick_language_suffix(all_osm_components, language, non_local_language, more_than_one_official_language)
|
||||||
if language_suffix and not non_local_language:
|
|
||||||
suffix = language_suffix.lstrip(':')
|
|
||||||
if suffix.startswith(language) and suffix != language:
|
|
||||||
language = suffix
|
|
||||||
else:
|
else:
|
||||||
language_suffix = ':{}'.format(language)
|
language_suffix = ':{}'.format(language)
|
||||||
|
|
||||||
@@ -1580,6 +1576,13 @@ class AddressComponents(object):
|
|||||||
self.cleanup_boundary_names(address_components)
|
self.cleanup_boundary_names(address_components)
|
||||||
self.country_specific_cleanup(address_components, country)
|
self.country_specific_cleanup(address_components, country)
|
||||||
|
|
||||||
|
language_altered = False
|
||||||
|
if language_suffix and not non_local_language and language in CJK_LANGUAGES:
|
||||||
|
suffix = language_suffix.lstrip(':').lower()
|
||||||
|
if suffix.split('_', 1)[0] == language:
|
||||||
|
language = self.language_code_aliases.get(suffix, suffix)
|
||||||
|
language_altered = True
|
||||||
|
|
||||||
self.replace_name_affixes(address_components, non_local_language or language, country=country)
|
self.replace_name_affixes(address_components, non_local_language or language, country=country)
|
||||||
|
|
||||||
self.replace_names(address_components)
|
self.replace_names(address_components)
|
||||||
@@ -1589,6 +1592,7 @@ class AddressComponents(object):
|
|||||||
self.cleanup_house_number(address_components)
|
self.cleanup_house_number(address_components)
|
||||||
|
|
||||||
self.remove_numeric_boundary_names(address_components)
|
self.remove_numeric_boundary_names(address_components)
|
||||||
|
|
||||||
self.add_house_number_phrase(address_components, language, country=country)
|
self.add_house_number_phrase(address_components, language, country=country)
|
||||||
self.add_postcode_phrase(address_components, language, country=country)
|
self.add_postcode_phrase(address_components, language, country=country)
|
||||||
self.add_metro_station_phrase(address_components, language, country=country)
|
self.add_metro_station_phrase(address_components, language, country=country)
|
||||||
@@ -1625,8 +1629,13 @@ class AddressComponents(object):
|
|||||||
|
|
||||||
self.drop_invalid_components(address_components, country)
|
self.drop_invalid_components(address_components, country)
|
||||||
|
|
||||||
if language_suffix and not non_local_language:
|
if language_suffix and not non_local_language and not language_altered:
|
||||||
language = language_suffix.lstrip(':')
|
language = language_suffix.lstrip(':').lower()
|
||||||
|
if '_' in language:
|
||||||
|
lang, script = language.split('_', 1)
|
||||||
|
if lang not in CJK_LANGUAGES and script.lower() not in self.valid_scripts:
|
||||||
|
language = lang
|
||||||
|
|
||||||
return address_components, country, language
|
return address_components, country, language
|
||||||
|
|
||||||
def limited(self, address_components, latitude, longitude):
|
def limited(self, address_components, latitude, longitude):
|
||||||
@@ -1690,5 +1699,9 @@ class AddressComponents(object):
|
|||||||
|
|
||||||
if language_suffix and not non_local_language:
|
if language_suffix and not non_local_language:
|
||||||
language = language_suffix.lstrip(':').lower()
|
language = language_suffix.lstrip(':').lower()
|
||||||
|
if '_' in language:
|
||||||
|
lang, script = language.split('_', 1)
|
||||||
|
if lang not in CJK_LANGUAGES and script.lower() not in self.valid_scripts:
|
||||||
|
language = lang
|
||||||
|
|
||||||
return address_components, country, language
|
return address_components, country, language
|
||||||
|
|||||||
Reference in New Issue
Block a user