From b2f8180d19968c29a89fbb2231245337c83ec3cc Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 25 Aug 2016 23:58:38 -0400 Subject: [PATCH] [openaddresses] Ignore any fields in OpenAddresses which have N/A as a value --- scripts/geodata/openaddresses/formatter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index fa3853d4..897106a1 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -1,6 +1,7 @@ import csv import os import random +import re import six import yaml @@ -24,6 +25,8 @@ OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv' OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv' +not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I) + class OpenAddressesFormatter(object): def __init__(self, components): @@ -163,6 +166,9 @@ class OpenAddressesFormatter(object): if value and len(value) < 2 or is_numeric(value): continue + if not_applicable_regex.match(value): + continue + components[key] = value.strip(', ') if components: