[polygons/zones] Adding a polygon reader for OSM zones (named residential/commercial/industrial/military areas) which are closed ways and can be used in addresses e.g. in office parks, larger housing complexes, etc.

This commit is contained in:
Al
2016-03-26 23:16:57 -04:00
parent 9ee1f56a0f
commit b25682e761
2 changed files with 56 additions and 15 deletions

View File

@@ -7,6 +7,7 @@ Generates polygons from OpenStreetMap relations
import array import array
import logging import logging
import six
from bisect import bisect_left from bisect import bisect_left
from collections import defaultdict, OrderedDict from collections import defaultdict, OrderedDict
@@ -17,7 +18,7 @@ from geodata.graph.scc import strongly_connected_components
from geodata.osm.extract import * from geodata.osm.extract import *
class OSMAdminPolygonReader(object): class OSMPolygonReader(object):
''' '''
OSM relations are stored with pointers to their bounding ways, OSM relations are stored with pointers to their bounding ways,
which in turn store pointers to their constituent nodes and the which in turn store pointers to their constituent nodes and the
@@ -147,6 +148,9 @@ class OSMAdminPolygonReader(object):
return polys return polys
def include_closed_way(self, props):
raise NotImplementedError('Children must implement')
def polygons(self): def polygons(self):
''' '''
Generator which yields tuples like: Generator which yields tuples like:
@@ -165,6 +169,7 @@ class OSMAdminPolygonReader(object):
i = 0 i = 0
for element_id, props, deps in parse_osm(self.filename, dependencies=True): for element_id, props, deps in parse_osm(self.filename, dependencies=True):
props = {safe_decode(k): safe_decode(v) for k, v in six.iteritems(props)}
if element_id.startswith('node'): if element_id.startswith('node'):
node_id = long(element_id.split(':')[-1]) node_id = long(element_id.split(':')[-1])
lat = props.get('lat') lat = props.get('lat')
@@ -200,6 +205,12 @@ class OSMAdminPolygonReader(object):
self.way_coords.append(self.coords[node_index * 2 + 1]) self.way_coords.append(self.coords[node_index * 2 + 1])
self.way_indptr.append(len(self.way_deps)) self.way_indptr.append(len(self.way_deps))
if deps[0] == deps[-1] and self.include_closed_way(props):
outer_polys = self.create_polygons([way_id])
inner_polys = []
yield WAY_OFFSET + way_id, props, outer_polys, inner_polys
elif element_id.startswith('relation'): elif element_id.startswith('relation'):
if self.node_ids is not None: if self.node_ids is not None:
self.node_ids = None self.node_ids = None
@@ -222,7 +233,17 @@ class OSMAdminPolygonReader(object):
outer_polys = self.create_polygons(outer_ways) outer_polys = self.create_polygons(outer_ways)
inner_polys = self.create_polygons(inner_ways) inner_polys = self.create_polygons(inner_ways)
yield relation_id, props, outer_polys, inner_polys yield RELATION_OFFSET + relation_id, props, outer_polys, inner_polys
if i % 1000 == 0 and i > 0: if i % 1000 == 0 and i > 0:
self.logger.info('doing {}s, at {}'.format(element_id.split(':')[0], i)) self.logger.info('doing {}s, at {}'.format(element_id.split(':')[0], i))
i += 1 i += 1
class OSMAdminPolygonReader(OSMPolygonReader):
def include_closed_way(self, props):
return False
class OSMZonePolygonReader(OSMPolygonReader):
def include_closed_way(self, props):
return 'landuse' in props

View File

@@ -18,6 +18,7 @@ import os
import re import re
import requests import requests
import shutil import shutil
import six
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
@@ -34,7 +35,7 @@ from geodata.i18n.unicode_properties import get_chars_by_script
from geodata.i18n.word_breaks import ideographic_scripts from geodata.i18n.word_breaks import ideographic_scripts
from geodata.names.deduping import NameDeduper from geodata.names.deduping import NameDeduper
from geodata.osm.extract import parse_osm, OSM_NAME_TAGS from geodata.osm.extract import parse_osm, OSM_NAME_TAGS
from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader, OSMZonePolygonReader
from geodata.polygons.index import * from geodata.polygons.index import *
from geodata.statistics.tf_idf import IDFIndex from geodata.statistics.tf_idf import IDFIndex
@@ -249,7 +250,6 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
ensure_dir(scratch_dir) ensure_dir(scratch_dir)
logger = logging.getLogger('neighborhoods') logger = logging.getLogger('neighborhoods')
logger.setLevel(logging.INFO)
qs_scratch_dir = os.path.join(scratch_dir, 'qs_neighborhoods') qs_scratch_dir = os.path.join(scratch_dir, 'qs_neighborhoods')
ensure_dir(qs_scratch_dir) ensure_dir(qs_scratch_dir)
@@ -272,7 +272,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
idf.update(doc) idf.update(doc)
for key, attrs, deps in parse_osm(filename): for key, attrs, deps in parse_osm(filename):
for k, v in attrs.iteritems(): for k, v in six.iteritems(attrs):
if any((k.startswith(name_key) for name_key in OSM_NAME_TAGS)): if any((k.startswith(name_key) for name_key in OSM_NAME_TAGS)):
doc = cls.count_words(v) doc = cls.count_words(v)
idf.update(doc) idf.update(doc)
@@ -304,7 +304,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
osm_names.append(name) osm_names.append(name)
for name_key in OSM_NAME_TAGS: for name_key in OSM_NAME_TAGS:
osm_names.extend([v for k, v in attrs.iteritems() if k.startswith('{}:'.format(name_key))]) osm_names.extend([v for k, v in six.iteritems(attrs) if k.startswith('{}:'.format(name_key))])
for idx in (zs, qs): for idx in (zs, qs):
candidates = idx.get_candidate_polygons(lat, lon, return_all=True) candidates = idx.get_candidate_polygons(lat, lon, return_all=True)
@@ -618,6 +618,8 @@ class OSMReverseGeocoder(RTreePolygonIndex):
ADMIN_LEVEL = 'admin_level' ADMIN_LEVEL = 'admin_level'
polygon_reader = OSMAdminPolygonReader
include_property_patterns = set([ include_property_patterns = set([
'name', 'name',
'name:*', 'name:*',
@@ -649,20 +651,26 @@ class OSMReverseGeocoder(RTreePolygonIndex):
''' '''
index = cls(save_dir=output_dir, index_filename=index_filename) index = cls(save_dir=output_dir, index_filename=index_filename)
reader = OSMAdminPolygonReader(filename) reader = cls.polygon_reader(filename)
polygons = reader.polygons() polygons = reader.polygons()
handler = logging.StreamHandler(sys.stderr)
reader.logger.addHandler(handler)
reader.logger.setLevel(logging.INFO)
logger = logging.getLogger('osm.reverse_geocode') logger = logging.getLogger('osm.reverse_geocode')
for relation_id, props, outer_polys, inner_polys in polygons: for element_id, props, outer_polys, inner_polys in polygons:
props = {k: v for k, v in props.iteritems() if k in cls.include_property_patterns props = {k: v for k, v in six.iteritems(props)
or (':' in k and '{}:*'.format(k.split(':', 1)[0]) in cls.include_property_patterns)} if k in cls.include_property_patterns or (six.u(':') in k and
six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)}
props['id'] = relation_id if element_id >= RELATION_OFFSET:
props['type'] = 'relation'
element_id -= RELATION_OFFSET
elif element_id >= WAY_OFFSET:
props['type'] = 'way'
element_id -= WAY_OFFSET
else:
props['type'] = 'node'
props['id'] = element_id
if inner_polys and not outer_polys: if inner_polys and not outer_polys:
logger.warn('inner polygons with no outer') logger.warn('inner polygons with no outer')
@@ -749,6 +757,11 @@ class OSMReverseGeocoder(RTreePolygonIndex):
candidates = super(OSMReverseGeocoder, self).get_candidate_polygons(lat, lon) candidates = super(OSMReverseGeocoder, self).get_candidate_polygons(lat, lon)
return sorted(candidates, key=self.sort_level, reverse=True) return sorted(candidates, key=self.sort_level, reverse=True)
class OSMZoneReverseGeocoder(OSMReverseGeocoder):
polygon_reader = OSMZonePolygonReader
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse'])
if __name__ == '__main__': if __name__ == '__main__':
# Handle argument parsing here # Handle argument parsing here
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@@ -759,6 +772,9 @@ if __name__ == '__main__':
parser.add_argument('-a', '--osm-admin-file', parser.add_argument('-a', '--osm-admin-file',
help='Path to OSM borders file (with dependencies, .osm format)') help='Path to OSM borders file (with dependencies, .osm format)')
parser.add_argument('-l', '--osm-landuse-file',
help='Path to OSM landuse file (with dependencies, .osm format)')
parser.add_argument('-n', '--osm-neighborhoods-file', parser.add_argument('-n', '--osm-neighborhoods-file',
help='Path to OSM neighborhoods file (no dependencies, .osm format)') help='Path to OSM neighborhoods file (no dependencies, .osm format)')
@@ -766,9 +782,13 @@ if __name__ == '__main__':
default=os.getcwd(), default=os.getcwd(),
help='Output directory') help='Output directory')
logging.basicConfig(level=logging.INFO)
args = parser.parse_args() args = parser.parse_args()
if args.osm_admin_file: if args.osm_admin_file:
index = OSMReverseGeocoder.create_from_osm_file(args.osm_admin_file, args.out_dir) index = OSMReverseGeocoder.create_from_osm_file(args.osm_admin_file, args.out_dir)
elif args.osm_landuse_file:
index = OSMZoneReverseGeocoder.create_from_osm_file(args.osm_landuse_file, args.out_dir)
elif args.osm_neighborhoods_file and args.quattroshapes_dir: elif args.osm_neighborhoods_file and args.quattroshapes_dir:
index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes( index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes(
args.osm_neighborhoods_file, args.osm_neighborhoods_file,