[languages] Better handling of non-default langauge canonicals in default langauge text

This commit is contained in:
Al
2015-08-24 01:26:01 -04:00
parent 96d7b990b5
commit c1ce91abbf
5 changed files with 20 additions and 5 deletions

View File

@@ -184,9 +184,11 @@ def disambiguate_language(text, languages):
continue
is_default = valid_languages[lang]
if (canonical and not stopword) or (is_default and len(potentials) == 1):
lang_valid = is_default or not seen_languages or lang in seen_languages
if lang_valid and ((canonical and not stopword) or (is_default and len(potentials) == 1)):
valid.append(lang)
elif is_default and num_defaults > 1 and current_lang != lang:
elif is_default and num_defaults > 1 and current_lang is not None and current_lang != lang:
return AMBIGUOUS_LANGUAGE
elif stopword and canonical and not is_default and lang in seen_languages:
valid.append(lang)