[openaddresses] Ignore any fields in OpenAddresses which have N/A as a value
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import six
|
import six
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@@ -24,6 +25,8 @@ OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir,
|
|||||||
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
|
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
|
||||||
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
|
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
|
||||||
|
|
||||||
|
not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I)
|
||||||
|
|
||||||
|
|
||||||
class OpenAddressesFormatter(object):
|
class OpenAddressesFormatter(object):
|
||||||
def __init__(self, components):
|
def __init__(self, components):
|
||||||
@@ -163,6 +166,9 @@ class OpenAddressesFormatter(object):
|
|||||||
if value and len(value) < 2 or is_numeric(value):
|
if value and len(value) < 2 or is_numeric(value):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if not_applicable_regex.match(value):
|
||||||
|
continue
|
||||||
|
|
||||||
components[key] = value.strip(', ')
|
components[key] = value.strip(', ')
|
||||||
|
|
||||||
if components:
|
if components:
|
||||||
|
|||||||
Reference in New Issue
Block a user