From fa7b855ecbdcc466428c2a95be135cbc9f1a58b9 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 24 Aug 2015 03:07:45 -0400 Subject: [PATCH] [languages] Earlier exit on finding ambiguous script spans --- scripts/geodata/language_id/disambiguation.py | 8 ++++---- scripts/geodata/tests/test_disambiguation.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index 8fbc1ae1..0fb9ced3 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -208,13 +208,13 @@ def disambiguate_language(text, languages): elif len(valid_default) == 1: current_lang = valid_default[0] + if any((current_lang not in langs for script, langs in script_langs.iteritems())): + return AMBIGUOUS_LANGUAGE + seen_languages.update(valid) if current_lang is not None: - if not any((current_lang not in langs for script, langs in script_langs.iteritems())): - return current_lang - else: - return AMBIGUOUS_LANGUAGE + return current_lang elif possible_lang is not None: if not any((possible_lang not in langs for script, langs in script_langs.iteritems())): return possible_lang diff --git a/scripts/geodata/tests/test_disambiguation.py b/scripts/geodata/tests/test_disambiguation.py index d9e90657..67d4430c 100644 --- a/scripts/geodata/tests/test_disambiguation.py +++ b/scripts/geodata/tests/test_disambiguation.py @@ -52,7 +52,7 @@ country_test_cases = [ ('128 A St.', 'ae', 'en'), # English / Arabic street address - ('Omar Street ﺵﺍﺮﻋ ﻊﻣﺭ', 'iq', AMBIGUOUS_LANGUAGE), + ('Omar Street شارع عمر', 'iq', AMBIGUOUS_LANGUAGE), # Random script ('Bayard Street - 擺也街', 'us', AMBIGUOUS_LANGUAGE),