From baa60aab65a638dc485c0f1dfdc1ebeab513bb98 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 21 Aug 2015 08:03:20 -0400 Subject: [PATCH] [fix] language dismabiguation module --- scripts/geodata/language_id/disambiguation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py index 19614a93..d1d31bfa 100644 --- a/scripts/geodata/language_id/disambiguation.py +++ b/scripts/geodata/language_id/disambiguation.py @@ -1,7 +1,7 @@ import os import sys -from collections import defaultdict +from collections import defaultdict, OrderedDict from marisa_trie import BytesTrie @@ -15,6 +15,7 @@ from geodata.i18n.unicode_paths import DATA_DIR from address_normalizer.text.normalize import PhraseFilter from address_normalizer.text.tokenize import * +WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es']) DICTIONARIES_DIR = os.path.join(DATA_DIR, 'dictionaries')