From 8c422a6e611b13cfe4c4a7d8e71ad5c5ead1b64b Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 24 Nov 2015 21:49:10 -0500 Subject: [PATCH] [osm] Adding new localized country names in anguage training data for formatted addresses --- scripts/geodata/osm/osm_address_training_data.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index b1733407..96f337e7 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -692,13 +692,19 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir): remove_keys = NAME_KEYS + HOUSE_NUMBER_KEYS + COUNTRY_KEYS + POSTAL_KEYS + OSM_IGNORE_KEYS + country_keys_set = set(COUNTRY_KEYS) + for key, value, deps in parse_osm(infile): try: latitude, longitude = latlon_to_decimal(value['lat'], value['lon']) except Exception: continue + have_country = False + for k in remove_keys: + if k in country_keys_set: + have_country = True _ = value.pop(k, None) if not value: @@ -709,10 +715,17 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir): continue single_language = len(name_language) == 1 + for lang, val in name_language.iteritems(): if lang not in languages: continue + if have_country: + localized = language_country_names.get(lang, {}).get(country.upper()) + + if localized: + value['addr:country:{}'.format(lang)] = localized + address_dict = value.copy() for k in address_dict.keys(): namespaced_val = u'{}:{}'.format(k, lang)