[fix] sets of potential languages in case phrase matches multiple dictionaries

2016-01-24 17:57:12 -05:00
parent b713d102d1
commit b4dcb83e10
1 changed files with 2 additions and 2 deletions
--- a/scripts/geodata/language_id/disambiguation.py
+++ b/scripts/geodata/language_id/disambiguation.py
@@ -103,8 +103,8 @@ def disambiguate_language(text, languages):
        if c is PHRASE:
            valid = OrderedDict()
            data = [safe_decode(d).split(u'|') for d in data]
-            potentials = [l for l, d, i, c in data if l in valid_languages]
+            potentials = set([l for l, d, i, c in data if l in valid_languages])
-            potential_defaults = [l for l in potentials if valid_languages[l]]
+            potential_defaults = set([l for l in potentials if valid_languages[l]])
            phrase_len = sum((len(t_i[0]) for t_i in t))
            for lang, dictionary, is_canonical, canonical in data: