[languages] Allow stopwords to help disambiguate if they can, otherwise ignore them
This commit is contained in:
@@ -184,10 +184,12 @@ def disambiguate_language(text, languages):
|
|||||||
continue
|
continue
|
||||||
is_default = valid_languages[lang]
|
is_default = valid_languages[lang]
|
||||||
|
|
||||||
if canonical or (is_default and len(potentials) == 1):
|
if (canonical and not stopword) or (is_default and len(potentials) == 1):
|
||||||
valid.append(lang)
|
valid.append(lang)
|
||||||
elif is_default and num_defaults > 1 and current_lang != lang:
|
elif is_default and num_defaults > 1 and current_lang != lang:
|
||||||
return AMBIGUOUS_LANGUAGE
|
return AMBIGUOUS_LANGUAGE
|
||||||
|
elif stopword and canonical and not is_default and lang in seen_languages:
|
||||||
|
valid.append(lang)
|
||||||
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
|
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
|
||||||
possible_lang = lang if possible_lang is None or possible_lang == lang else None
|
possible_lang = lang if possible_lang is None or possible_lang == lang else None
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ country_test_cases = [
|
|||||||
('El Camino', 'us', 'es'),
|
('El Camino', 'us', 'es'),
|
||||||
('Rue Louis Phillippe', 'us', 'fr'),
|
('Rue Louis Phillippe', 'us', 'fr'),
|
||||||
('Calle Street', 'us', AMBIGUOUS_LANGUAGE),
|
('Calle Street', 'us', AMBIGUOUS_LANGUAGE),
|
||||||
|
('Del Rio Avenue', 'us', 'en'),
|
||||||
|
|
||||||
# Avenue + stopword
|
# Avenue + stopword
|
||||||
('Avenue du Bourget-du-Lac', 'je', 'fr'),
|
('Avenue du Bourget-du-Lac', 'je', 'fr'),
|
||||||
|
|||||||
Reference in New Issue
Block a user