From 2b9d58dcbecdfb8f47a258bdd915e481c4cf118d Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 26 Aug 2016 15:48:32 -0400 Subject: [PATCH] [openaddresses] Ignoring fields with null-like values as well (there appear to be no valid places named Null or None...yet) --- scripts/geodata/openaddresses/formatter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 9f2de7b1..e1a90b92 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -27,6 +27,7 @@ OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv' multiple_spaces_regex = re.compile('[\s]{2,}') numeric_range_regex = re.compile('[\s]*\-[\s]*') +null_regex = re.compile('^\s*(?:null|none)\s*$', re.I) not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I) @@ -171,7 +172,7 @@ class OpenAddressesFormatter(object): if value and len(value) < 2 or is_numeric(value): continue - if not_applicable_regex.match(value): + if not_applicable_regex.match(value) or null_regex.match(value): continue value = multiple_spaces_regex.sub(six.u(' '), value)