[openaddresses] adding debug argument to OpenAddresses training data
This commit is contained in:
@@ -63,7 +63,7 @@ class OpenAddressesFormatter(object):
|
|||||||
re.I | re.UNICODE)
|
re.I | re.UNICODE)
|
||||||
unit_type_regexes[lang] = pattern
|
unit_type_regexes[lang] = pattern
|
||||||
|
|
||||||
def __init__(self, components):
|
def __init__(self, components, debug=False):
|
||||||
self.components = components
|
self.components = components
|
||||||
self.language_rtree = components.language_rtree
|
self.language_rtree = components.language_rtree
|
||||||
|
|
||||||
@@ -71,6 +71,8 @@ class OpenAddressesFormatter(object):
|
|||||||
self.config = config['global']
|
self.config = config['global']
|
||||||
self.country_configs = config['countries']
|
self.country_configs = config['countries']
|
||||||
|
|
||||||
|
self.debug = debug
|
||||||
|
|
||||||
self.formatter = AddressFormatter()
|
self.formatter = AddressFormatter()
|
||||||
|
|
||||||
class validators:
|
class validators:
|
||||||
@@ -432,6 +434,8 @@ class OpenAddressesFormatter(object):
|
|||||||
i += 1
|
i += 1
|
||||||
if i % 1000 == 0 and i > 0:
|
if i % 1000 == 0 and i > 0:
|
||||||
print('did {} formatted addresses'.format(i))
|
print('did {} formatted addresses'.format(i))
|
||||||
|
if self.debug:
|
||||||
|
break
|
||||||
|
|
||||||
for subdir, subdir_config in six.iteritems(config.get('subdirs', {})):
|
for subdir, subdir_config in six.iteritems(config.get('subdirs', {})):
|
||||||
for file_config in subdir_config.get('files', []):
|
for file_config in subdir_config.get('files', []):
|
||||||
@@ -460,3 +464,5 @@ class OpenAddressesFormatter(object):
|
|||||||
i += 1
|
i += 1
|
||||||
if i % 1000 == 0 and i > 0:
|
if i % 1000 == 0 and i > 0:
|
||||||
print('did {} formatted addresses'.format(i))
|
print('did {} formatted addresses'.format(i))
|
||||||
|
if self.debug:
|
||||||
|
break
|
||||||
|
|||||||
@@ -55,6 +55,11 @@ if __name__ == '__main__':
|
|||||||
default=None,
|
default=None,
|
||||||
help='Neighborhoods reverse geocoder RTree directory')
|
help='Neighborhoods reverse geocoder RTree directory')
|
||||||
|
|
||||||
|
parser.add_argument('--debug',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Test on a sample of each file to debug config')
|
||||||
|
|
||||||
parser.add_argument('-o', '--out-dir',
|
parser.add_argument('-o', '--out-dir',
|
||||||
default=os.getcwd(),
|
default=os.getcwd(),
|
||||||
help='Output directory')
|
help='Output directory')
|
||||||
@@ -83,5 +88,5 @@ if __name__ == '__main__':
|
|||||||
if args.openaddresses_dir and args.format:
|
if args.openaddresses_dir and args.format:
|
||||||
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames)
|
||||||
|
|
||||||
oa_formatter = OpenAddressesFormatter(components)
|
oa_formatter = OpenAddressesFormatter(components, debug=args.debug)
|
||||||
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)
|
oa_formatter.build_training_data(args.openaddresses_dir, args.out_dir, tag_components=not args.untagged)
|
||||||
|
|||||||
Reference in New Issue
Block a user