[openaddresses] adding debug argument to OpenAddresses training data

This commit is contained in:
Al
2016-08-28 17:58:41 -04:00
parent 6740e5a1c6
commit 27c5c8536a
2 changed files with 13 additions and 2 deletions

View File

@@ -63,7 +63,7 @@ class OpenAddressesFormatter(object):
re.I | re.UNICODE)
unit_type_regexes[lang] = pattern
def __init__(self, components):
def __init__(self, components, debug=False):
self.components = components
self.language_rtree = components.language_rtree
@@ -71,6 +71,8 @@ class OpenAddressesFormatter(object):
self.config = config['global']
self.country_configs = config['countries']
self.debug = debug
self.formatter = AddressFormatter()
class validators:
@@ -432,6 +434,8 @@ class OpenAddressesFormatter(object):
i += 1
if i % 1000 == 0 and i > 0:
print('did {} formatted addresses'.format(i))
if self.debug:
break
for subdir, subdir_config in six.iteritems(config.get('subdirs', {})):
for file_config in subdir_config.get('files', []):
@@ -460,3 +464,5 @@ class OpenAddressesFormatter(object):
i += 1
if i % 1000 == 0 and i > 0:
print('did {} formatted addresses'.format(i))
if self.debug:
break

View File

@@ -55,6 +55,11 @@ if __name__ == '__main__':
default=None,
help='Neighborhoods reverse geocoder RTree directory')
parser.add_argument('--debug',
action='store_true',
default=False,
help='Test on a sample of each file to debug config')
parser.add_argument('-o', '--out-dir',
default=os.getcwd(),
help='Output directory')
@@ -83,5 +88,5 @@ if __name__ == '__main__':
if args.openaddresses_dir and args.format:
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
oa_formatter = OpenAddressesFormatter(components)
oa_formatter = OpenAddressesFormatter(components, debug=args.debug)
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)