From cec4914233b84d779efe69a8dda315cf9a106991 Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 31 Jul 2016 01:12:04 -0400 Subject: [PATCH] [openaddresses] In some OpenAddresses data sets, the house number is just a copy of the street name, so eliminate non-numeric house numbers to be safe --- scripts/geodata/openaddresses/formatter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 07c1d839..cc32fdab 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -10,6 +10,7 @@ from geodata.address_formatting.formatter import AddressFormatter from geodata.addresses.components import AddressComponents from geodata.countries.names import country_names from geodata.math.sampling import cdf, weighted_choice +from geodata.text.utils import is_numeric from geodata.csv_utils import tsv_string, unicode_csv_reader @@ -125,6 +126,10 @@ class OpenAddressesFormatter(object): separate_prob=separate_street_prob) components[AddressFormatter.ROAD] = street + house_number = components.get(AddressFormatter.HOUSE_NUMBER, None) + if house_number and not is_numeric(house_number): + components.pop(AddressFormatter.HOUSE_NUMBER) + unit = components.get(AddressFormatter.UNIT, None) if unit is not None: unit = abbreviate(unit_types_gazetteer, unit, language,