From 890268aa87308601025bffe770ae8e4eb49436e2 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 24 May 2016 12:13:58 -0400 Subject: [PATCH] [languages] Use English formats for Romanized CJK --- resources/parser/default.yaml | 5 +++++ scripts/geodata/addresses/components.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/resources/parser/default.yaml b/resources/parser/default.yaml index 12e6a942..c837c65c 100644 --- a/resources/parser/default.yaml +++ b/resources/parser/default.yaml @@ -7,6 +7,11 @@ languages: # Replace user-tagged admin components with the non-local language version replace_non_local_probability: 0.4 + language_code_replacements: + ja_rm: en + ko_rm: en + zh_pinyin: en + # Dependencies for including each component in an "address" # Two-way dependencies are not an issue component_dependencies: diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 55a2653a..95f9addf 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -136,6 +136,8 @@ class AddressComponents(object): # Non-admin component dropout self.address_level_dropout_probabilities = {k: v['probability'] for k, v in six.iteritems(self.config['dropout'])} + self.language_code_replacements = nested_get(self.config, ('languages', 'language_code_replacements')) + self.osm_admin_rtree = osm_admin_rtree self.language_rtree = language_rtree self.neighborhoods_rtree = neighborhoods_rtree @@ -1105,6 +1107,10 @@ class AddressComponents(object): self.drop_invalid_components(address_components) + if language_suffix: + suffix_lang = language_suffix.lstrip(':') + language = self.language_code_replacements.get(suffix_lang, language) + return address_components, country, language def limited(self, address_components, latitude, longitude):