From 134cf616d6c510a38e285fceab2e9fc11dfa7b2c Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 21 Sep 2015 04:09:15 -0400 Subject: [PATCH] [osm] Using street for language disambiguation in training data --- scripts/geodata/osm/osm_address_training_data.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 6cceb2c2..fb5b618f 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -396,10 +396,15 @@ def build_address_format_training_data(language_rtree, infile, out_dir, tag_comp for key in remove_keys: _ = value.pop(key, None) - if len(candidate_languages) == 1: - language = candidate_languages[0]['lang'] - else: - language = disambiguate_language(v, [(l['lang'], l['default']) for l in candidate_languages]) + language = None + if tag_components: + if len(candidate_languages) == 1: + language = candidate_languages[0]['lang'] + else: + street = value.get('addr:street', None) + if street is None: + continue + language = disambiguate_language(street, [(l['lang'], l['default']) for l in candidate_languages]) formatted_address = formatter.format_address(country, value, tag_components=tag_components) if formatted_address is not None: