From 72c3723b4357302601a3a600899c448500461e6a Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 26 Jul 2016 17:45:23 -0400 Subject: [PATCH] [osm] Validate postcode with a regex for the given country code before sending on to parser_osm_number_range (some postcodes can also look like ranges e.g. 83-101 so validate for the given country) --- scripts/geodata/osm/formatter.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index ee43557a..f473e84a 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -25,6 +25,7 @@ from geodata.configs.utils import nested_get from geodata.countries.country_names import * from geodata.language_id.disambiguation import * from geodata.language_id.sample import INTERNET_LANGUAGE_DISTRIBUTION +from geodata.i18n.google import postcode_regexes from geodata.i18n.languages import * from geodata.intersections.query import Intersection, IntersectionQuery from geodata.address_formatting.formatter import AddressFormatter @@ -411,7 +412,16 @@ class OSMAddressFormatter(object): postal_code = revised_tags.get(AddressFormatter.POSTCODE, None) postal_codes = [] if postal_code: - postal_codes = parse_osm_number_range(postal_code, parse_letter_range=False) + valid_postcode = False + postcode_regex = postcode_regexes.get(country) + if postcode_regex: + match = postcode_regex.match(postal_code) + if match and match.end() == len(postal_code): + valid_postcode = True + postal_codes.append(postal_code) + + if not valid_postcode: + postal_codes = parse_osm_number_range(postal_code, parse_letter_range=False) try: population = int(tags.get('population', 0))