[openaddresses] Ignoring fields with null-like values as well (there appear to be no valid places named Null or None...yet)

This commit is contained in:
Al
2016-08-26 15:48:32 -04:00
parent 2654683af4
commit 2b9d58dcbe

View File

@@ -27,6 +27,7 @@ OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
multiple_spaces_regex = re.compile('[\s]{2,}')
numeric_range_regex = re.compile('[\s]*\-[\s]*')
null_regex = re.compile('^\s*(?:null|none)\s*$', re.I)
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
@@ -171,7 +172,7 @@ class OpenAddressesFormatter(object):
if value and len(value) < 2 or is_numeric(value):
continue
if not_applicable_regex.match(value):
if not_applicable_regex.match(value) or null_regex.match(value):
continue
value = multiple_spaces_regex.sub(six.u(' '), value)