diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index 029484a6..c51e15a6 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -247,7 +247,7 @@ class AddressComponents(object): else: if has_non_latin_script(lang_tuples): for component, value in six.iteritems(components): - language = disambiguate_language_script(value, lang_tuples) + language, script_langs = disambiguate_language_script(value, lang_tuples) if language is not UNKNOWN_LANGUAGE: break else: diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index c553553f..7f162a99 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -86,11 +86,11 @@ def disambiguate_language_script(text, languages): script_langs[script] = set(script_valid) if script_len == len(text) and len(script_valid) == 1: - return script_valid[0] + return script_valid[0], script_langs read_len += script_len - return UNKNOWN_LANGUAGE + return UNKNOWN_LANGUAGE, script_langs LATIN_TRANSLITERATED_SCRIPTS = {'Arabic', 'Cyrllic'} @@ -107,7 +107,7 @@ def disambiguate_language(text, languages, scripts_only=False): text = safe_decode(text) valid_languages = OrderedDict(languages) - language_script = disambiguate_language_script(text, languages) + language_script, script_langs = disambiguate_language_script(text, languages) if language_script is not UNKNOWN_LANGUAGE: return language_script