From e92963de50fb3f5d1f85e55ddf61c225f5c092f8 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 9 Dec 2016 01:57:21 -0500 Subject: [PATCH] [openaddresses] adding new counties from OpenAddresses, strip commas option for thousands separators --- resources/parser/data_sets/openaddresses.yaml | 10 ++++++---- scripts/geodata/openaddresses/formatter.py | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index c773affc..c43db931 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -85,6 +85,8 @@ global: add_osm_boundaries: false add_osm_neighborhoods: true + house_number_strip_commas: false + # Units have strong restrictions, have to be a number or hyphenated number non_numeric_units: false @@ -1140,6 +1142,8 @@ countries: add_osm_boundaries: true city_replacements: state_district: Coconino County + - filename: cochise.csv + house_number_strip_commas: true - filename: gila.csv - filename: maricopa.csv - filename: pima.csv @@ -2049,10 +2053,6 @@ countries: add: state: KY files: - - filename: boone.csv - add_osm_boundaries: true - city_replacements: - state_district: Boone County - filename: city_of_paducah.csv add_osm_boundaries: true city_replacements: @@ -3294,6 +3294,7 @@ countries: - filename: city_of_spokane.csv - filename: city_of_yakima.csv - filename: clallam.csv + - filename: clark.csv - filename: cowlitz.csv - filename: douglas.csv - filename: franklin.csv @@ -3329,6 +3330,7 @@ countries: state_district: Snohomish County - filename: snohomish.csv - filename: spokane.csv + - filename: thurston.csv - filename: walla_walla.csv wi: diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index a099ee4e..8dd36681 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -171,8 +171,10 @@ class OpenAddressesFormatter(object): return country_name @classmethod - def cleanup_number(cls, num): + def cleanup_number(cls, num, strip_commas=False): num = num.strip() + if strip_commas: + num = num.replace(six.u(','), six.u('')) try: num_int = int(num) except (ValueError, TypeError): @@ -224,6 +226,7 @@ class OpenAddressesFormatter(object): add_osm_boundaries = bool(self.get_property('add_osm_boundaries', *configs) or False) add_osm_neighborhoods = bool(self.get_property('add_osm_neighborhoods', *configs) or False) non_numeric_units = bool(self.get_property('non_numeric_units', *configs) or False) + house_number_strip_commas = bool(self.get_property('house_number_strip_commas', *configs) or False) numeric_postcodes_only = bool(self.get_property('numeric_postcodes_only', *configs) or False) postcode_strip_non_digit_chars = bool(self.get_property('postcode_strip_non_digit_chars', *configs) or False) @@ -349,7 +352,7 @@ class OpenAddressesFormatter(object): house_number = components.get(AddressFormatter.HOUSE_NUMBER, None) if house_number: - house_number = self.cleanup_number(house_number) + house_number = self.cleanup_number(house_number, strip_commas=house_number_strip_commas) if house_number is not None: components[AddressFormatter.HOUSE_NUMBER] = house_number