From b15d2d70aa71cb1e37c206f06bbeec2f803c3885 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 3 Sep 2015 20:09:46 -0400 Subject: [PATCH] [fix] top language --- scripts/geodata/osm/osm_address_training_data.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 7c3e0fb2..add6566f 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -624,12 +624,17 @@ def build_toponym_training_data(language_rtree, infile, out_dir): name_language = defaultdict(list) + official = official_languages[country] + num_langs = len(candidate_languages) - default_langs = set([l for l, default in official_languages[country].iteritems() if default]) + default_langs = set([l for l, default in official.iteritems() if default]) num_defaults = len(default_langs) - top_lang = official_languages[country].iterkeys().next() - if top_lang not in WELL_REPRESENTED_LANGUAGES and len(default_langs) > 1: + top_lang = None + if len(official) > 0: + top_lang = official.iterkeys().next() + + if top_lang is not None and top_lang not in WELL_REPRESENTED_LANGUAGES and len(default_langs) > 1: default_langs -= WELL_REPRESENTED_LANGUAGES elif len(default_langs & WELL_REPRESENTED_LANGUAGES) > 1: continue