[languages] Allow stopwords to help disambiguate if they can, otherwise ignore them

This commit is contained in:
Al
2015-08-23 23:04:17 -04:00
parent d14be57e73
commit 84e0982cbc
2 changed files with 4 additions and 1 deletions

View File

@@ -184,10 +184,12 @@ def disambiguate_language(text, languages):
continue
is_default = valid_languages[lang]
if canonical or (is_default and len(potentials) == 1):
if (canonical and not stopword) or (is_default and len(potentials) == 1):
valid.append(lang)
elif is_default and num_defaults > 1 and current_lang != lang:
return AMBIGUOUS_LANGUAGE
elif stopword and canonical and not is_default and lang in seen_languages:
valid.append(lang)
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
possible_lang = lang if possible_lang is None or possible_lang == lang else None

View File

@@ -27,6 +27,7 @@ country_test_cases = [
('El Camino', 'us', 'es'),
('Rue Louis Phillippe', 'us', 'fr'),
('Calle Street', 'us', AMBIGUOUS_LANGUAGE),
('Del Rio Avenue', 'us', 'en'),
# Avenue + stopword
('Avenue du Bourget-du-Lac', 'je', 'fr'),