[languages] Non-default language canonicals, more test cases

This commit is contained in:
Al
2015-08-24 02:21:53 -04:00
parent c1ce91abbf
commit e1d336716c
2 changed files with 10 additions and 2 deletions

View File

@@ -195,7 +195,8 @@ def disambiguate_language(text, languages):
elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1: elif not seen_languages and len(potentials) == 1 and len(t[0][1]) > 1:
possible_lang = lang if possible_lang is None or possible_lang == lang else None possible_lang = lang if possible_lang is None or possible_lang == lang else None
if seen_languages and valid and not any((l in seen_languages for l in valid)): if seen_languages and valid and not any((l in seen_languages for l in valid)) and \
(not any((valid_languages.get(l) for l in valid)) or any((valid_languages.get(l) for l in seen_languages))):
return AMBIGUOUS_LANGUAGE return AMBIGUOUS_LANGUAGE
if len(valid) == 1: if len(valid) == 1:

View File

@@ -29,13 +29,20 @@ country_test_cases = [
('No 2 School House', 'us', UNKNOWN_LANGUAGE), ('No 2 School House', 'us', UNKNOWN_LANGUAGE),
('E Thetford Rd', 'us', 'en'), ('E Thetford Rd', 'us', 'en'),
('El Camino', 'us', 'es'), ('El Camino', 'us', 'es'),
('The El Camino', 'us', 'en'),
('Via Antiqua Street', 'us', 'en'),
('Salt Evaporator Plan Road', 'us', 'en'),
('Calle Las Brisas North', 'us', 'en'),
('Chateau Estates', 'us', 'en'),
('Grand Boulevard', 'us', 'en'),
('Rue Louis Phillippe', 'us', 'fr'), ('Rue Louis Phillippe', 'us', 'fr'),
('Calle Street', 'us', AMBIGUOUS_LANGUAGE), ('Calle Street', 'us', 'en'),
('Del Rio Avenue', 'us', 'en'), ('Del Rio Avenue', 'us', 'en'),
('South Signal Butte Road', 'us', 'en'), ('South Signal Butte Road', 'us', 'en'),
('Chief All Over', 'us', UNKNOWN_LANGUAGE), ('Chief All Over', 'us', UNKNOWN_LANGUAGE),
('South Alameda Street', 'us', 'en'), ('South Alameda Street', 'us', 'en'),
('The Alameda', 'us', 'en'), ('The Alameda', 'us', 'en'),
('Rincon Road', 'us', 'en'),
# Avenue + stopword # Avenue + stopword
('Avenue du Bourget-du-Lac', 'je', 'fr'), ('Avenue du Bourget-du-Lac', 'je', 'fr'),