From 4e9f9e89571e0f840920e1d474e6faf631fc47a1 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 26 Aug 2016 12:45:49 -0400 Subject: [PATCH] [openaddresses] Replace multiple spaces with single space --- scripts/geodata/openaddresses/formatter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 71f6d728..7017c966 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -25,7 +25,8 @@ OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv' OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv' -numeric_range_regex = re.compile(six.u('[\s]*\-[\s]*')) +multiple_spaces_regex = re.compile('[\s]{2,}') +numeric_range_regex = re.compile('[\s]*\-[\s]*') not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I) @@ -170,6 +171,8 @@ class OpenAddressesFormatter(object): if not_applicable_regex.match(value): continue + value = multiple_spaces_regex.sub(six.u(' '), value) + value = value.strip(', ') if value: components[key] = value