[polygons/osm] Construct polygons from OSM relations using a number of space-saving optimizations in order to process planet in a reasonable amount of memory. Builds a graph of connected ways such that forming polygons is equivalent to finding strongly connected components.
This commit is contained in:
217
scripts/geodata/osm/osm_admin_boundaries.py
Normal file
217
scripts/geodata/osm/osm_admin_boundaries.py
Normal file
@@ -0,0 +1,217 @@
|
||||
'''
|
||||
osm_admin_boundaries.py
|
||||
---------------
|
||||
|
||||
Generates polygons from OpenStreetMap relations
|
||||
'''
|
||||
|
||||
import array
|
||||
import logging
|
||||
|
||||
from bisect import bisect_left
|
||||
from itertools import izip, combinations
|
||||
|
||||
from geodata.coordinates.conversion import latlon_to_decimal
|
||||
from geodata.osm.extract import *
|
||||
|
||||
|
||||
class OSMAdminPolygonReader(object):
|
||||
'''
|
||||
OSM relations are stored with pointers to their bounding ways,
|
||||
which in turn store pointers to their constituent nodes and the
|
||||
XML file for planet is far too large to be parsed in-memory.
|
||||
|
||||
For the purposes of constructing (multi)polygons, we need lists
|
||||
of lat/lon coordinates for the edges of each outer and inner polygon
|
||||
that form the overall boundary (this allows for holes e.g.
|
||||
Lesotho/South Africa and multiple disjoint polygons such as islands)
|
||||
|
||||
This class creates a compact representation of the intermediate
|
||||
lookup tables and coordinates using Python's typed array module
|
||||
which stores C-sized ints, doubles, etc. in a dynamic array. It's like
|
||||
a list but smaller and faster for arrays of numbers and doesn't require
|
||||
pulling in numpy as a dependency when all we want is the space savings.
|
||||
|
||||
One nice property of the .osm files generated by osmfilter is that
|
||||
nodes/ways/relations are stored in sorted order, so we don't have to
|
||||
pre-sort the lookup arrays before performing binary search.
|
||||
'''
|
||||
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
|
||||
self.node_ids = array.array('l')
|
||||
self.way_ids = array.array('l')
|
||||
|
||||
self.coords = array.array('d')
|
||||
|
||||
self.way_deps = array.array('l')
|
||||
self.way_coords = array.array('d')
|
||||
self.way_indptr = array.array('i', [0])
|
||||
|
||||
self.logger = logging.getLogger('osm_admin_polys')
|
||||
|
||||
def binary_search(self, a, x):
|
||||
'''Locate the leftmost value exactly equal to x'''
|
||||
i = bisect_left(a, x)
|
||||
if i != len(a) and a[i] == x:
|
||||
return i
|
||||
raise ValueError
|
||||
|
||||
def node_coordinates(self, coords, indptr, idx):
|
||||
start_index = indptr[idx] * 2
|
||||
end_index = indptr[idx + 1] * 2
|
||||
return zip(coords[start_index:end_index:2],
|
||||
coords[start_index + 1:end_index:2])
|
||||
|
||||
def sparse_deps(self, data, indptr, idx):
|
||||
return [data[i] for i in xrange(indptr[idx], indptr[idx + 1])]
|
||||
|
||||
def create_polygons(self, ways):
|
||||
'''
|
||||
Polygons (relations) are effectively stored as lists of
|
||||
line segments (ways) and there may be more than one polygon
|
||||
(island chains, overseas territories).
|
||||
|
||||
If we view the line segments as a graph (any two ways which
|
||||
share a terminal node are connected), then the process of
|
||||
constructing polygons reduces to finding strongly connected
|
||||
components in a graph.
|
||||
|
||||
https://en.wikipedia.org/wiki/Strongly_connected_component
|
||||
|
||||
Note that even though there may be hundreds of thousands of
|
||||
points in a complex polygon like a country boundary, the
|
||||
graph is only
|
||||
'''
|
||||
end_nodes = defaultdict(list)
|
||||
polys = []
|
||||
|
||||
way_indices = {}
|
||||
start_end_nodes = {}
|
||||
|
||||
for way_id in ways:
|
||||
# Find the way position via binary search
|
||||
try:
|
||||
way_index = self.binary_search(self.way_ids, way_id)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Cache the way index
|
||||
way_indices[way_id] = way_index
|
||||
|
||||
# way_indptr is a compressed index into way_deps/way_coords
|
||||
# way_index i is stored at indices way_indptr[i]:way_indptr[i+1]
|
||||
# in way_deps
|
||||
start_node_id = self.way_deps[self.way_indptr[way_index]]
|
||||
end_node_id = self.way_deps[self.way_indptr[way_index + 1] - 1]
|
||||
|
||||
start_end_nodes[way_id] = (start_node_id, end_node_id)
|
||||
|
||||
if start_node_id == end_node_id:
|
||||
way_node_points = self.node_coordinates(self.way_coords, self.way_indptr, way_index)
|
||||
polys.append(way_node_points)
|
||||
continue
|
||||
|
||||
end_nodes[start_node_id].append(way_id)
|
||||
end_nodes[end_node_id].append(way_id)
|
||||
|
||||
# Way graph for a single polygon, don't need to be as concerned about storage
|
||||
way_graph = defaultdict(OrderedDict)
|
||||
|
||||
for node_id, ways in end_nodes.iteritems():
|
||||
for w1, w2 in combinations(ways, 2):
|
||||
way_graph[w1][w2] = None
|
||||
way_graph[w2][w1] = None
|
||||
|
||||
way_graph = {v: w.keys() for v, w in way_graph.iteritems()}
|
||||
|
||||
for component in strongly_connected_components(way_graph):
|
||||
poly_nodes = []
|
||||
|
||||
for way_id in component:
|
||||
way_index = way_indices[way_id]
|
||||
poly_nodes.extend(self.node_coordinates(self.way_coords, self.way_indptr, way_index))
|
||||
|
||||
polys.append(poly_nodes)
|
||||
|
||||
return polys
|
||||
|
||||
def polygons(self):
|
||||
'''
|
||||
Generator which yields tuples like:
|
||||
|
||||
(relation_id, properties, outer_polygons, inner_polygons)
|
||||
|
||||
At this point a polygon is a list of coordinate tuples,
|
||||
suitable for passing to shapely's Polygon constructor
|
||||
but may be used for other purposes.
|
||||
|
||||
outer_polygons is a list of the exterior polygons for this
|
||||
boundary. inner_polygons is a list of "holes" in the exterior
|
||||
polygons although donuts and donut-holes need to be matched
|
||||
by the caller using something like shapely's contains.
|
||||
'''
|
||||
i = 0
|
||||
|
||||
for element_id, props, deps in parse_osm(self.filename, dependencies=True):
|
||||
if element_id.startswith('node'):
|
||||
node_id = long(element_id.split(':')[-1])
|
||||
lat = props.get('lat')
|
||||
lon = props.get('lon')
|
||||
if lat is None or lon is None:
|
||||
continue
|
||||
lat, lon = latlon_to_decimal(lat, lon)
|
||||
if lat is None or lon is None:
|
||||
continue
|
||||
# Nodes are stored in a sorted array, coordinate indices are simply
|
||||
# [lat, lon, lat, lon, ...] so the index can be calculated as 2 * i
|
||||
self.coords.append(lat)
|
||||
self.coords.append(lon)
|
||||
self.node_ids.append(node_id)
|
||||
elif element_id.startswith('way'):
|
||||
way_id = long(element_id.split(':')[-1])
|
||||
|
||||
# Get node indices by binary search
|
||||
try:
|
||||
node_indices = [self.binary_search(self.node_ids, node_id) for node_id in deps]
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Way ids stored in a sorted array
|
||||
self.way_ids.append(way_id)
|
||||
|
||||
# way_deps is the list of dependent node ids
|
||||
# way_coords is a copy of coords indexed by way ids
|
||||
for node_id, node_index in izip(deps, node_indices):
|
||||
self.way_deps.append(node_id)
|
||||
self.way_coords.append(self.coords[node_index * 2])
|
||||
self.way_coords.append(self.coords[node_index * 2 + 1])
|
||||
|
||||
self.way_indptr.append(len(self.way_deps))
|
||||
elif element_id.startswith('relation'):
|
||||
if self.node_ids is not None:
|
||||
self.node_ids = None
|
||||
if self.coords is not None:
|
||||
self.coords = None
|
||||
|
||||
relation_id = long(element_id.split(':')[-1])
|
||||
if len(deps) == 0 or not props.get('boundary'):
|
||||
continue
|
||||
|
||||
outer_ways = []
|
||||
inner_ways = []
|
||||
|
||||
for way_id, role in deps:
|
||||
if role == 'outer':
|
||||
outer_ways.append(way_id)
|
||||
elif role == 'inner':
|
||||
inner_ways.append(way_id)
|
||||
|
||||
outer_polys = self.create_polygons(outer_ways)
|
||||
inner_polys = self.create_polygons(inner_ways)
|
||||
|
||||
yield relation_id, props, outer_polys, inner_polys
|
||||
if i % 1000 == 0 and i > 0:
|
||||
self.logger.info('on {}s, did {}'.format(element_id.split(':')[0], i))
|
||||
i += 1
|
||||
Reference in New Issue
Block a user