From b25682e76158f92a73fe575e5b3566fbf7e4b5dd Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 26 Mar 2016 23:16:57 -0400 Subject: [PATCH] [polygons/zones] Adding a polygon reader for OSM zones (named residential/commercial/industrial/military areas) which are closed ways and can be used in addresses e.g. in office parks, larger housing complexes, etc. --- scripts/geodata/osm/osm_admin_boundaries.py | 25 ++++++++++- scripts/geodata/polygons/reverse_geocode.py | 46 +++++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/scripts/geodata/osm/osm_admin_boundaries.py b/scripts/geodata/osm/osm_admin_boundaries.py index df74251d..e9b6f883 100644 --- a/scripts/geodata/osm/osm_admin_boundaries.py +++ b/scripts/geodata/osm/osm_admin_boundaries.py @@ -7,6 +7,7 @@ Generates polygons from OpenStreetMap relations import array import logging +import six from bisect import bisect_left from collections import defaultdict, OrderedDict @@ -17,7 +18,7 @@ from geodata.graph.scc import strongly_connected_components from geodata.osm.extract import * -class OSMAdminPolygonReader(object): +class OSMPolygonReader(object): ''' OSM relations are stored with pointers to their bounding ways, which in turn store pointers to their constituent nodes and the @@ -147,6 +148,9 @@ class OSMAdminPolygonReader(object): return polys + def include_closed_way(self, props): + raise NotImplementedError('Children must implement') + def polygons(self): ''' Generator which yields tuples like: @@ -165,6 +169,7 @@ class OSMAdminPolygonReader(object): i = 0 for element_id, props, deps in parse_osm(self.filename, dependencies=True): + props = {safe_decode(k): safe_decode(v) for k, v in six.iteritems(props)} if element_id.startswith('node'): node_id = long(element_id.split(':')[-1]) lat = props.get('lat') @@ -200,6 +205,12 @@ class OSMAdminPolygonReader(object): self.way_coords.append(self.coords[node_index * 2 + 1]) self.way_indptr.append(len(self.way_deps)) + + if deps[0] == deps[-1] and self.include_closed_way(props): + outer_polys = self.create_polygons([way_id]) + inner_polys = [] + yield WAY_OFFSET + way_id, props, outer_polys, inner_polys + elif element_id.startswith('relation'): if self.node_ids is not None: self.node_ids = None @@ -222,7 +233,17 @@ class OSMAdminPolygonReader(object): outer_polys = self.create_polygons(outer_ways) inner_polys = self.create_polygons(inner_ways) - yield relation_id, props, outer_polys, inner_polys + yield RELATION_OFFSET + relation_id, props, outer_polys, inner_polys if i % 1000 == 0 and i > 0: self.logger.info('doing {}s, at {}'.format(element_id.split(':')[0], i)) i += 1 + + +class OSMAdminPolygonReader(OSMPolygonReader): + def include_closed_way(self, props): + return False + + +class OSMZonePolygonReader(OSMPolygonReader): + def include_closed_way(self, props): + return 'landuse' in props diff --git a/scripts/geodata/polygons/reverse_geocode.py b/scripts/geodata/polygons/reverse_geocode.py index cf864b04..f58419e7 100644 --- a/scripts/geodata/polygons/reverse_geocode.py +++ b/scripts/geodata/polygons/reverse_geocode.py @@ -18,6 +18,7 @@ import os import re import requests import shutil +import six import subprocess import sys import tempfile @@ -34,7 +35,7 @@ from geodata.i18n.unicode_properties import get_chars_by_script from geodata.i18n.word_breaks import ideographic_scripts from geodata.names.deduping import NameDeduper from geodata.osm.extract import parse_osm, OSM_NAME_TAGS -from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader +from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader, OSMZonePolygonReader from geodata.polygons.index import * from geodata.statistics.tf_idf import IDFIndex @@ -249,7 +250,6 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): ensure_dir(scratch_dir) logger = logging.getLogger('neighborhoods') - logger.setLevel(logging.INFO) qs_scratch_dir = os.path.join(scratch_dir, 'qs_neighborhoods') ensure_dir(qs_scratch_dir) @@ -272,7 +272,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): idf.update(doc) for key, attrs, deps in parse_osm(filename): - for k, v in attrs.iteritems(): + for k, v in six.iteritems(attrs): if any((k.startswith(name_key) for name_key in OSM_NAME_TAGS)): doc = cls.count_words(v) idf.update(doc) @@ -304,7 +304,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): osm_names.append(name) for name_key in OSM_NAME_TAGS: - osm_names.extend([v for k, v in attrs.iteritems() if k.startswith('{}:'.format(name_key))]) + osm_names.extend([v for k, v in six.iteritems(attrs) if k.startswith('{}:'.format(name_key))]) for idx in (zs, qs): candidates = idx.get_candidate_polygons(lat, lon, return_all=True) @@ -618,6 +618,8 @@ class OSMReverseGeocoder(RTreePolygonIndex): ADMIN_LEVEL = 'admin_level' + polygon_reader = OSMAdminPolygonReader + include_property_patterns = set([ 'name', 'name:*', @@ -649,20 +651,26 @@ class OSMReverseGeocoder(RTreePolygonIndex): ''' index = cls(save_dir=output_dir, index_filename=index_filename) - reader = OSMAdminPolygonReader(filename) + reader = cls.polygon_reader(filename) polygons = reader.polygons() - handler = logging.StreamHandler(sys.stderr) - reader.logger.addHandler(handler) - reader.logger.setLevel(logging.INFO) - logger = logging.getLogger('osm.reverse_geocode') - for relation_id, props, outer_polys, inner_polys in polygons: - props = {k: v for k, v in props.iteritems() if k in cls.include_property_patterns - or (':' in k and '{}:*'.format(k.split(':', 1)[0]) in cls.include_property_patterns)} + for element_id, props, outer_polys, inner_polys in polygons: + props = {k: v for k, v in six.iteritems(props) + if k in cls.include_property_patterns or (six.u(':') in k and + six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)} - props['id'] = relation_id + if element_id >= RELATION_OFFSET: + props['type'] = 'relation' + element_id -= RELATION_OFFSET + elif element_id >= WAY_OFFSET: + props['type'] = 'way' + element_id -= WAY_OFFSET + else: + props['type'] = 'node' + + props['id'] = element_id if inner_polys and not outer_polys: logger.warn('inner polygons with no outer') @@ -749,6 +757,11 @@ class OSMReverseGeocoder(RTreePolygonIndex): candidates = super(OSMReverseGeocoder, self).get_candidate_polygons(lat, lon) return sorted(candidates, key=self.sort_level, reverse=True) + +class OSMZoneReverseGeocoder(OSMReverseGeocoder): + polygon_reader = OSMZonePolygonReader + include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse']) + if __name__ == '__main__': # Handle argument parsing here parser = argparse.ArgumentParser() @@ -759,6 +772,9 @@ if __name__ == '__main__': parser.add_argument('-a', '--osm-admin-file', help='Path to OSM borders file (with dependencies, .osm format)') + parser.add_argument('-l', '--osm-landuse-file', + help='Path to OSM landuse file (with dependencies, .osm format)') + parser.add_argument('-n', '--osm-neighborhoods-file', help='Path to OSM neighborhoods file (no dependencies, .osm format)') @@ -766,9 +782,13 @@ if __name__ == '__main__': default=os.getcwd(), help='Output directory') + logging.basicConfig(level=logging.INFO) + args = parser.parse_args() if args.osm_admin_file: index = OSMReverseGeocoder.create_from_osm_file(args.osm_admin_file, args.out_dir) + elif args.osm_landuse_file: + index = OSMZoneReverseGeocoder.create_from_osm_file(args.osm_landuse_file, args.out_dir) elif args.osm_neighborhoods_file and args.quattroshapes_dir: index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes( args.osm_neighborhoods_file,