From b4dcb83e1067d9a0cb04be13f0197e0fb3498bfb Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 24 Jan 2016 17:57:12 -0500 Subject: [PATCH] [fix] sets of potential languages in case phrase matches multiple dictionaries --- scripts/geodata/language_id/disambiguation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index be5e5f67..1b015117 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -103,8 +103,8 @@ def disambiguate_language(text, languages): if c is PHRASE: valid = OrderedDict() data = [safe_decode(d).split(u'|') for d in data] - potentials = [l for l, d, i, c in data if l in valid_languages] - potential_defaults = [l for l in potentials if valid_languages[l]] + potentials = set([l for l, d, i, c in data if l in valid_languages]) + potential_defaults = set([l for l in potentials if valid_languages[l]]) phrase_len = sum((len(t_i[0]) for t_i in t)) for lang, dictionary, is_canonical, canonical in data: