[fix] do component dropout anyway

This commit is contained in:
Al
2016-08-28 14:07:49 -04:00
parent 44e59e8daf
commit 51590825ee

View File

@@ -28,8 +28,8 @@ this_dir = os.path.realpath(os.path.dirname(__file__))
OPENADDRESSES_PARSER_DATA_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, os.pardir,
'resources', 'parser', 'data_sets', 'openaddresses.yaml')
OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
OPENADDRESS_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
OPENADDRESSES_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_tagged.tsv'
OPENADDRESSES_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv'
null_regex = re.compile('^\s*(?:null|none)\s*$', re.I)
unknown_regex = re.compile('^\s*(?:unknown)\s*$', re.I)
@@ -367,7 +367,12 @@ class OpenAddressesFormatter(object):
# This is expensive, so only turn on for files that don't supply their own city names
# or for which those names are flawed
osm_components = []
population = None
# Using population=0 instead of None means if there's no known population or
# we don't need to add OSM components, we assume the population of the town is
# very small and the place name shouldn't be used unqualified (i.e. needs information
# like state name to disambiguate it)
population = 0
if add_osm_boundaries or AddressFormatter.CITY not in components:
osm_components = self.components.osm_reverse_geocoded_components(latitude, longitude)
self.components.add_admin_boundaries(components, osm_components, country, language)
@@ -383,9 +388,9 @@ class OpenAddressesFormatter(object):
neighborhood_components = self.components.neighborhood_components(latitude, longitude)
self.components.add_neighborhoods(components, neighborhood_components)
if add_osm_boundaries or add_osm_neighborhoods:
all_osm_components = osm_components + neighborhood_components
components = place_config.dropout_components(components, all_osm_components, country=country, population=population)
# Component dropout
all_osm_components = osm_components + neighborhood_components
components = place_config.dropout_components(components, all_osm_components, country=country, population=population)
formatted = self.formatter.format_address(components, country,
language=language, tag_components=tag_components)
@@ -393,10 +398,10 @@ class OpenAddressesFormatter(object):
def build_training_data(self, base_dir, out_dir, tag_components=True):
if tag_components:
formatted_tagged_file = open(os.path.join(out_dir, OPENADDRESS_FORMAT_DATA_TAGGED_FILENAME), 'w')
formatted_tagged_file = open(os.path.join(out_dir, OPENADDRESSES_FORMAT_DATA_TAGGED_FILENAME), 'w')
writer = csv.writer(formatted_tagged_file, 'tsv_no_quote')
else:
formatted_tagged_file = open(os.path.join(out_dir, OPENADDRESS_FORMAT_DATA_FILENAME), 'w')
formatted_tagged_file = open(os.path.join(out_dir, OPENADDRESSES_FORMAT_DATA_FILENAME), 'w')
writer = csv.writer(formatted_tagged_file, 'tsv_no_quote')
i = 0