From f8a0463aa093eda56e60c5484b739bec16dcb4a2 Mon Sep 17 00:00:00 2001
From: Al <albarrentine@gmail.com>
Date: Sun, 24 Jan 2016 15:09:51 -0500
Subject: [PATCH] [languages] Language disambiguation treats the national
 languages as non-default

---
 scripts/geodata/language_id/disambiguation.py | 2 +-
 scripts/geodata/language_id/polygon_lookup.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/geodata/language_id/disambiguation.py b/scripts/geodata/language_id/disambiguation.py
index 6023c82c..425b7dbc 100644
--- a/scripts/geodata/language_id/disambiguation.py
+++ b/scripts/geodata/language_id/disambiguation.py
@@ -18,7 +18,7 @@ from geodata.text.tokenize import tokenize
 WELL_REPRESENTED_LANGUAGES = set(['en', 'fr', 'it', 'de', 'nl', 'es', 'pt'])
 
 # For toponyms, we want to limit the countries we consider to those where
-# we the place names can themselves be considered training examples of the language
+# the place names can themselves be considered training examples of the language
 WELL_REPRESENTED_LANGUAGE_COUNTRIES = {
     'en': set(['gb', 'us', 'ca', 'au', 'nz', 'ie']),
     'fr': set(['fr']),
diff --git a/scripts/geodata/language_id/polygon_lookup.py b/scripts/geodata/language_id/polygon_lookup.py
index f2642d6a..eb3879cf 100644
--- a/scripts/geodata/language_id/polygon_lookup.py
+++ b/scripts/geodata/language_id/polygon_lookup.py
@@ -10,11 +10,17 @@ def country_and_languages(language_rtree, latitude, longitude):
     languages = []
     language_set = set()
 
+    have_regional = False
+
     for p in props:
         for l in p['languages']:
             lang = l['lang']
             if lang not in language_set:
                 language_set.add(lang)
+                if p['admin_level'] > 0:
+                    have_regional = True
+                elif have_regional:
+                    l = {'lang': l['lang'], 'default': 0}
                 languages.append(l)
 
     # Python's builtin sort is stable, so if there are two defaults, the first remains first