From 3dc2a922fbb04126121decd1db7457af671a2f28 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 22 Nov 2016 18:27:23 -0500 Subject: [PATCH] [addresses/languages] if there's only one default language and we don't have a road name or a unicode script to disambiguate, assume the default (e.g. English in the US unless there's a Spanish/French road name). Can affect things like state abbreviations --- scripts/geodata/addresses/components.py | 6 +++++- scripts/geodata/language_id/disambiguation.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 2533185c..bcdfd521 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -284,7 +284,11 @@ class AddressComponents(object): else: language = UNKNOWN_LANGUAGE else: - language = UNKNOWN_LANGUAGE + default_languages = [lang for lang, default in candidate_languages if default] + if len(default_languages) == 1: + language = default_languages[0] + else: + language = UNKNOWN_LANGUAGE return language diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index 7f162a99..ae2918be 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -97,7 +97,7 @@ LATIN_TRANSLITERATED_SCRIPTS = {'Arabic', 'Cyrllic'} def has_non_latin_script(languages): for lang, is_default in languages: - scripts = script_languages.get(lang, set()) + scripts = lang_scripts.get(lang, set()) if LATIN_SCRIPT not in scripts or scripts & LATIN_TRANSLITERATED_SCRIPTS: return True return False