From 308080f6ee10902cfb2a5d885e1a24b5a378752a Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 24 May 2016 16:52:08 -0400 Subject: [PATCH] [formatting] Moving language country overrides to formatter config so actual language is retained --- resources/formatting/global.yaml | 5 +++++ resources/parser/default.yaml | 5 ----- scripts/geodata/address_formatting/formatter.py | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/resources/formatting/global.yaml b/resources/formatting/global.yaml index 9deeaedf..04a79966 100644 --- a/resources/formatting/global.yaml +++ b/resources/formatting/global.yaml @@ -3,6 +3,11 @@ global: place_direction: right direction_probability: 0.9 + language_code_replacements: + ja_rm: en + ko_rm: en + zh_pinyin: en + insertions: # For each component, insertions are mutually exclusive # They don't have to sum to 1 (especially for components diff --git a/resources/parser/default.yaml b/resources/parser/default.yaml index c837c65c..12e6a942 100644 --- a/resources/parser/default.yaml +++ b/resources/parser/default.yaml @@ -7,11 +7,6 @@ languages: # Replace user-tagged admin components with the non-local language version replace_non_local_probability: 0.4 - language_code_replacements: - ja_rm: en - ko_rm: en - zh_pinyin: en - # Dependencies for including each component in an "address" # Two-way dependencies are not an issue component_dependencies: diff --git a/scripts/geodata/address_formatting/formatter.py b/scripts/geodata/address_formatting/formatter.py index 0a0152d8..847146eb 100644 --- a/scripts/geodata/address_formatting/formatter.py +++ b/scripts/geodata/address_formatting/formatter.py @@ -180,6 +180,8 @@ class AddressFormatter(object): self.load_config() self.load_country_formats() + self.language_code_replacements = nested_get(self.config, ('global', 'languages', 'language_code_replacements')) + self.setup_insertion_probabilities() self.setup_no_name_templates() self.setup_place_only_templates() @@ -681,6 +683,7 @@ class AddressFormatter(object): def get_template_from_config(self, config, country, language=None): template = None if language: + language = self.language_code_replacements.get(language, language) # For countries like China and Japan where the country format varies # based on which language is being used template = config.get('{}_{}'.format(country.upper(), language.lower()), None)