diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index 8fbc1ae1..0fb9ced3 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -208,13 +208,13 @@ def disambiguate_language(text, languages): elif len(valid_default) == 1: current_lang = valid_default[0] + if any((current_lang not in langs for script, langs in script_langs.iteritems())): + return AMBIGUOUS_LANGUAGE + seen_languages.update(valid) if current_lang is not None: - if not any((current_lang not in langs for script, langs in script_langs.iteritems())): - return current_lang - else: - return AMBIGUOUS_LANGUAGE + return current_lang elif possible_lang is not None: if not any((possible_lang not in langs for script, langs in script_langs.iteritems())): return possible_lang diff --git a/scripts/geodata/tests/test_disambiguation.py b/scripts/geodata/tests/test_disambiguation.py index d9e90657..67d4430c 100644 --- a/scripts/geodata/tests/test_disambiguation.py +++ b/scripts/geodata/tests/test_disambiguation.py @@ -52,7 +52,7 @@ country_test_cases = [ ('128 A St.', 'ae', 'en'), # English / Arabic street address - ('Omar Street ﺵﺍﺮﻋ ﻊﻣﺭ', 'iq', AMBIGUOUS_LANGUAGE), + ('Omar Street شارع عمر', 'iq', AMBIGUOUS_LANGUAGE), # Random script ('Bayard Street - 擺也街', 'us', AMBIGUOUS_LANGUAGE),