[languages] Non-default language canonicals, more test cases
This commit is contained in:
@@ -195,7 +195,8 @@ def disambiguate_language(text, languages):
|
||||
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
|
||||
possible_lang = lang if possible_lang is None or possible_lang == lang else None
|
||||
|
||||
if seen_languages and valid and not any((l in seen_languages for l in valid)):
|
||||
if seen_languages and valid and not any((l in seen_languages for l in valid)) and \
|
||||
(not any((valid_languages.get(l) for l in valid)) or any((valid_languages.get(l) for l in seen_languages))):
|
||||
return AMBIGUOUS_LANGUAGE
|
||||
|
||||
if len(valid) == 1:
|
||||
|
||||
@@ -29,13 +29,20 @@ country_test_cases = [
|
||||
('No 2 School House', 'us', UNKNOWN_LANGUAGE),
|
||||
('E Thetford Rd', 'us', 'en'),
|
||||
('El Camino', 'us', 'es'),
|
||||
('The El Camino', 'us', 'en'),
|
||||
('Via Antiqua Street', 'us', 'en'),
|
||||
('Salt Evaporator Plan Road', 'us', 'en'),
|
||||
('Calle Las Brisas North', 'us', 'en'),
|
||||
('Chateau Estates', 'us', 'en'),
|
||||
('Grand Boulevard', 'us', 'en'),
|
||||
('Rue Louis Phillippe', 'us', 'fr'),
|
||||
('Calle Street', 'us', AMBIGUOUS_LANGUAGE),
|
||||
('Calle Street', 'us', 'en'),
|
||||
('Del Rio Avenue', 'us', 'en'),
|
||||
('South Signal Butte Road', 'us', 'en'),
|
||||
('Chief All Over', 'us', UNKNOWN_LANGUAGE),
|
||||
('South Alameda Street', 'us', 'en'),
|
||||
('The Alameda', 'us', 'en'),
|
||||
('Rincon Road', 'us', 'en'),
|
||||
|
||||
# Avenue + stopword
|
||||
('Avenue du Bourget-du-Lac', 'je', 'fr'),
|
||||
|
||||
Reference in New Issue
Block a user