[languages] Allow stopwords to help disambiguate if they can, otherwise ignore them
This commit is contained in:
@@ -184,10 +184,12 @@ def disambiguate_language(text, languages):
|
||||
continue
|
||||
is_default = valid_languages[lang]
|
||||
|
||||
if canonical or (is_default and len(potentials) == 1):
|
||||
if (canonical and not stopword) or (is_default and len(potentials) == 1):
|
||||
valid.append(lang)
|
||||
elif is_default and num_defaults > 1 and current_lang != lang:
|
||||
return AMBIGUOUS_LANGUAGE
|
||||
elif stopword and canonical and not is_default and lang in seen_languages:
|
||||
valid.append(lang)
|
||||
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
|
||||
possible_lang = lang if possible_lang is None or possible_lang == lang else None
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ country_test_cases = [
|
||||
('El Camino', 'us', 'es'),
|
||||
('Rue Louis Phillippe', 'us', 'fr'),
|
||||
('Calle Street', 'us', AMBIGUOUS_LANGUAGE),
|
||||
('Del Rio Avenue', 'us', 'en'),
|
||||
|
||||
# Avenue + stopword
|
||||
('Avenue du Bourget-du-Lac', 'je', 'fr'),
|
||||
|
||||
Reference in New Issue
Block a user