From a2ec8001b0d1c7caa39b1fb1e4e1b466973e1424 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 24 Aug 2015 14:08:36 -0400 Subject: [PATCH] [osm] Removing postal code keys in formatted language training data --- scripts/geodata/osm/osm_address_training_data.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index 8ca7b150..af7b3c44 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -491,6 +491,12 @@ COUNTRY_KEYS = ( 'country_name', 'addr:country', ) +POSTAL_KEYS = ( + 'postcode', + 'postal_code', + 'addr:postcode', + 'addr:postal_code', +) def build_address_format_training_data_limited(language_rtree, infile, out_dir): @@ -501,13 +507,15 @@ def build_address_format_training_data_limited(language_rtree, infile, out_dir): f = open(os.path.join(out_dir, ADDRESS_FORMAT_DATA_LANGUAGE_FILENAME), 'w') writer = csv.writer(f, 'tsv_no_quote') + remove_keys = NAME_KEYS + COUNTRY_KEYS + POSTAL_KEYS + for key, value in parse_osm(infile): try: latitude, longitude = latlon_to_floats(value['lat'], value['lon']) except Exception: continue - for k in NAME_KEYS + COUNTRY_KEYS: + for k in remove_keys: _ = value.pop(k, None) if not value: