[languages] Language disambiguation treats the national languages as non-default

2016-01-24 15:09:51 -05:00
parent 87aff60a7e
commit f8a0463aa0
2 changed files with 7 additions and 1 deletions
--- a/scripts/geodata/language_id/disambiguation.py
+++ b/scripts/geodata/language_id/disambiguation.py
@@ -18,7 +18,7 @@ from geodata.text.tokenize import tokenize
 WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es', 'pt'])

 # For toponyms, we want to limit the countries we consider to those where
-# we the place names can themselves be considered training examples of the language
+# the place names can themselves be considered training examples of the language
 WELL_REPRESENTED_LANGUAGE_COUNTRIES = {
    'en': set(['gb', 'us', 'ca', 'au', 'nz', 'ie']),
    'fr': set(['fr']),