From e7cf5000fe8f62c4d7c72923822ced3864e560fd Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 3 Sep 2015 20:48:04 -0400 Subject: [PATCH] [fix] Exclude polygons with > 1 regional language --- scripts/geodata/osm/osm_address_training_data.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 47b177fb..9aad5a53 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -626,10 +626,12 @@ def build_toponym_training_data(language_rtree, infile, out_dir): official = official_languages[country] - num_langs = len(official) + num_langs = len(candidate_languages) default_langs = set([l for l, default in official.iteritems() if default]) num_defaults = len(default_langs) + regional_langs = [p['lang'] for p in language_props if p.get('admin_level', 0) > 0] + top_lang = None if len(official) > 0: top_lang = official.iterkeys().next() @@ -664,8 +666,8 @@ def build_toponym_training_data(language_rtree, infile, out_dir): have_qualified_names = True name_language[lang].append(v) - if not have_qualified_names and num_langs == 1 and 'name' in value: - name_language[top_lang].append(value['name']) + if not have_qualified_names and len(regional_langs) <= 1 and num_langs == 1 and 'name' in value: + name_language[candidate_languages[0]['lang']].append(value['name']) for k, v in name_language.iteritems(): for s in v: