[fix] Accounting for unknown scripts in disambiguation
This commit is contained in:
@@ -186,7 +186,7 @@ def disambiguate_language(text, languages):
|
|||||||
while read_len < len(text):
|
while read_len < len(text):
|
||||||
script, script_len, is_ascii = get_string_script(text[read_len:])
|
script, script_len, is_ascii = get_string_script(text[read_len:])
|
||||||
if script != LATIN_SCRIPT:
|
if script != LATIN_SCRIPT:
|
||||||
script_langs[script] = set([l for l, d in languages if l in script_languages[script]])
|
script_langs[script] = set([l for l, d in languages if l in script_languages.get(script, [])])
|
||||||
read_len += script_len
|
read_len += script_len
|
||||||
|
|
||||||
num_defaults = sum((1 for lang, default in valid_languages.iteritems() if default))
|
num_defaults = sum((1 for lang, default in valid_languages.iteritems() if default))
|
||||||
|
|||||||
Reference in New Issue
Block a user