[openaddresses] Ignore any fields in OpenAddresses which have N/A as a value

This commit is contained in:
Al
2016-08-25 23:58:38 -04:00
parent c23a7a4030
commit b2f8180d19

View File

@@ -1,6 +1,7 @@
import csv
import os
import random
import re
import six
import yaml
@@ -24,6 +25,8 @@ OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir,
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
class OpenAddressesFormatter(object):
def __init__(self, components):
@@ -163,6 +166,9 @@ class OpenAddressesFormatter(object):
if value and len(value) < 2 or is_numeric(value):
continue
if not_applicable_regex.match(value):
continue
components[key] = value.strip(', ')
if components: