[fix] language disambiguation

This commit is contained in:
Al
2016-05-23 11:54:36 -04:00
parent 2a4f8c5634
commit 66e35d517d
2 changed files with 4 additions and 4 deletions

View File

@@ -247,7 +247,7 @@ class AddressComponents(object):
else: else:
if has_non_latin_script(lang_tuples): if has_non_latin_script(lang_tuples):
for component, value in six.iteritems(components): for component, value in six.iteritems(components):
language = disambiguate_language_script(value, lang_tuples) language, script_langs = disambiguate_language_script(value, lang_tuples)
if language is not UNKNOWN_LANGUAGE: if language is not UNKNOWN_LANGUAGE:
break break
else: else:

View File

@@ -86,11 +86,11 @@ def disambiguate_language_script(text, languages):
script_langs[script] = set(script_valid) script_langs[script] = set(script_valid)
if script_len == len(text) and len(script_valid) == 1: if script_len == len(text) and len(script_valid) == 1:
return script_valid[0] return script_valid[0], script_langs
read_len += script_len read_len += script_len
return UNKNOWN_LANGUAGE return UNKNOWN_LANGUAGE, script_langs
LATIN_TRANSLITERATED_SCRIPTS = {'Arabic', 'Cyrllic'} LATIN_TRANSLITERATED_SCRIPTS = {'Arabic', 'Cyrllic'}
@@ -107,7 +107,7 @@ def disambiguate_language(text, languages, scripts_only=False):
text = safe_decode(text) text = safe_decode(text)
valid_languages = OrderedDict(languages) valid_languages = OrderedDict(languages)
language_script = disambiguate_language_script(text, languages) language_script, script_langs = disambiguate_language_script(text, languages)
if language_script is not UNKNOWN_LANGUAGE: if language_script is not UNKNOWN_LANGUAGE:
return language_script return language_script