[openaddresses] adding debug argument to OpenAddresses training data
This commit is contained in:
@@ -63,7 +63,7 @@ class OpenAddressesFormatter(object):
|
||||
re.I | re.UNICODE)
|
||||
unit_type_regexes[lang] = pattern
|
||||
|
||||
def __init__(self, components):
|
||||
def __init__(self, components, debug=False):
|
||||
self.components = components
|
||||
self.language_rtree = components.language_rtree
|
||||
|
||||
@@ -71,6 +71,8 @@ class OpenAddressesFormatter(object):
|
||||
self.config = config['global']
|
||||
self.country_configs = config['countries']
|
||||
|
||||
self.debug = debug
|
||||
|
||||
self.formatter = AddressFormatter()
|
||||
|
||||
class validators:
|
||||
@@ -432,6 +434,8 @@ class OpenAddressesFormatter(object):
|
||||
i += 1
|
||||
if i % 1000 == 0 and i > 0:
|
||||
print('did {} formatted addresses'.format(i))
|
||||
if self.debug:
|
||||
break
|
||||
|
||||
for subdir, subdir_config in six.iteritems(config.get('subdirs', {})):
|
||||
for file_config in subdir_config.get('files', []):
|
||||
@@ -460,3 +464,5 @@ class OpenAddressesFormatter(object):
|
||||
i += 1
|
||||
if i % 1000 == 0 and i > 0:
|
||||
print('did {} formatted addresses'.format(i))
|
||||
if self.debug:
|
||||
break
|
||||
|
||||
@@ -55,6 +55,11 @@ if __name__ == '__main__':
|
||||
default=None,
|
||||
help='Neighborhoods reverse geocoder RTree directory')
|
||||
|
||||
parser.add_argument('--debug',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Test on a sample of each file to debug config')
|
||||
|
||||
parser.add_argument('-o', '--out-dir',
|
||||
default=os.getcwd(),
|
||||
help='Output directory')
|
||||
@@ -83,5 +88,5 @@ if __name__ == '__main__':
|
||||
if args.openaddresses_dir and args.format:
|
||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||
|
||||
oa_formatter = OpenAddressesFormatter(components)
|
||||
oa_formatter = OpenAddressesFormatter(components, debug=args.debug)
|
||||
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)
|
||||
|
||||
Reference in New Issue
Block a user