From 145af9331e8946a662257a6e496f7472904598a5 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 17 Aug 2016 18:11:55 -0400 Subject: [PATCH] [osm] build OSM training data for intersections using the JSON output from intersections.py rather having to compute each time --- scripts/geodata/osm/formatter.py | 5 ++--- scripts/geodata/osm/osm_address_training_data.py | 12 ++++-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/scripts/geodata/osm/formatter.py b/scripts/geodata/osm/formatter.py index 1c50c042..0ad328ac 100644 --- a/scripts/geodata/osm/formatter.py +++ b/scripts/geodata/osm/formatter.py @@ -1027,7 +1027,7 @@ class OSMAddressFormatter(object): if i % 1000 == 0 and i > 0: print('did {} formatted places'.format(i)) - def build_intersections_training_data(self, infile, out_dir, way_db_dir, tag_components=True): + def build_intersections_training_data(self, infile, out_dir, tag_components=True): ''' Intersection addresses like "4th & Main Street" are represented in OSM by ways that share at least one node. @@ -1053,8 +1053,7 @@ class OSMAddressFormatter(object): replace_with_base_name_prob = float(nested_get(self.config, ('intersections', 'replace_with_base_name_probability'), default=0.0)) - reader = OSMIntersectionReader(infile, way_db_dir) - for node_id, latitude, longitude, ways in reader.intersections(): + for node_id, latitude, longitude, ways in OSMIntersectionReader.read_intersections(infile): if not ways or len(ways) < 2: continue diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index baf0a4a2..370ca345 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -26,6 +26,9 @@ python osm_address_training_data.py -a $(OSM_DIR)/planet-addresses.osm -f --lang Formatted addresses (untagged): python osm_address_training_data.py -a $(OSM_DIR)/planet-addresses.osm -f -u --language-rtree-dir=$(LANG_RTREE_DIR) --neighborhoods-rtree-dir=$(NEIGHBORHOODS_RTREE_DIR) --rtree-dir=$(RTREE_DIR) --quattroshapes-rtree-dir=$(QS_TREE_DIR) --geonames-db=$(GEONAMES_DB_PATH) -o $(OUT_DIR) +Intersections (after running intersections.py to create the JSON file): +python osm_address_training_data -x $(OSM_DIR)/intersections.json -f --language-rtree-dir=$(LANG_RTREE_DIR) --neighborhoods-rtree-dir=$(NEIGHBORHOODS_RTREE_DIR) --rtree-dir=$(RTREE_DIR) --quattroshapes-rtree-dir=$(QS_TREE_DIR) --geonames-db=$(GEONAMES_DB_PATH) -o $(OUT_DIR) + Toponyms: python osm_address_training_data.py -b $(OSM_DIR)/planet-borders.osm --language-rtree-dir=$(LANG_RTREE_DIR) -o $(OUT_DIR) ''' @@ -452,9 +455,6 @@ if __name__ == '__main__': parser.add_argument('-x', '--intersections-file', help='Path to planet-ways-latlons.osm') - parser.add_argument('--ways-db-dir', - help='Path to temporary ways db') - parser.add_argument('--language-rtree-dir', required=True, help='Language RTree directory') @@ -556,10 +556,6 @@ if __name__ == '__main__': osm_formatter.build_place_training_data(args.place_nodes_file, args.out_dir, tag_components=not args.untagged) if args.intersections_file and args.format: - if args.ways_db_dir is None: - parser.error('--ways-db-dir required for intersections') - - logging.basicConfig(level=logging.INFO) components = AddressComponents(osm_rtree, language_rtree, neighborhoods_rtree, quattroshapes_rtree, geonames, metro_stations_index) osm_formatter = OSMAddressFormatter(components, subdivisions_rtree, buildings_rtree, metro_stations_index) - osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, args.ways_db_dir, tag_components=not args.untagged) + osm_formatter.build_intersections_training_data(args.intersections_file, args.out_dir, tag_components=not args.untagged)