[languages] Earlier exit on finding ambiguous script spans
This commit is contained in:
@@ -208,13 +208,13 @@ def disambiguate_language(text, languages):
|
|||||||
elif len(valid_default) == 1:
|
elif len(valid_default) == 1:
|
||||||
current_lang = valid_default[0]
|
current_lang = valid_default[0]
|
||||||
|
|
||||||
|
if any((current_lang not in langs for script, langs in script_langs.iteritems())):
|
||||||
|
return AMBIGUOUS_LANGUAGE
|
||||||
|
|
||||||
seen_languages.update(valid)
|
seen_languages.update(valid)
|
||||||
|
|
||||||
if current_lang is not None:
|
if current_lang is not None:
|
||||||
if not any((current_lang not in langs for script, langs in script_langs.iteritems())):
|
return current_lang
|
||||||
return current_lang
|
|
||||||
else:
|
|
||||||
return AMBIGUOUS_LANGUAGE
|
|
||||||
elif possible_lang is not None:
|
elif possible_lang is not None:
|
||||||
if not any((possible_lang not in langs for script, langs in script_langs.iteritems())):
|
if not any((possible_lang not in langs for script, langs in script_langs.iteritems())):
|
||||||
return possible_lang
|
return possible_lang
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ country_test_cases = [
|
|||||||
('128 A St.', 'ae', 'en'),
|
('128 A St.', 'ae', 'en'),
|
||||||
|
|
||||||
# English / Arabic street address
|
# English / Arabic street address
|
||||||
('Omar Street ﺵﺍﺮﻋ ﻊﻣﺭ', 'iq', AMBIGUOUS_LANGUAGE),
|
('Omar Street شارع عمر', 'iq', AMBIGUOUS_LANGUAGE),
|
||||||
|
|
||||||
# Random script
|
# Random script
|
||||||
('Bayard Street - 擺也街', 'us', AMBIGUOUS_LANGUAGE),
|
('Bayard Street - 擺也街', 'us', AMBIGUOUS_LANGUAGE),
|
||||||
|
|||||||
Reference in New Issue
Block a user