[osm] Storing polygon properties in a LevelDB, polygons themselves stay in memory
This commit is contained in:
@@ -4,6 +4,8 @@ import os
|
||||
import rtree
|
||||
import ujson as json
|
||||
|
||||
from leveldb import LevelDB
|
||||
|
||||
from collections import OrderedDict, defaultdict
|
||||
from shapely.geometry import Point, Polygon, MultiPolygon
|
||||
from shapely.prepared import prep
|
||||
@@ -11,28 +13,30 @@ from shapely.geometry.geo import mapping
|
||||
|
||||
from geodata.polygons.area import polygon_bounding_box_area
|
||||
|
||||
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
||||
|
||||
|
||||
class PolygonIndex(object):
|
||||
include_only_properties = None
|
||||
simplify_tolerance = 0.0001
|
||||
simplify_tolerance = 0.00001
|
||||
preserve_topology = True
|
||||
large_polygon_threshold = 10000
|
||||
|
||||
INDEX_FILENAME = None
|
||||
POLYGONS_DB_DIR = 'polygons'
|
||||
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
||||
|
||||
def __init__(self, index=None, polygons=None, save_dir=None,
|
||||
def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None,
|
||||
index_filename=None,
|
||||
polygons_db_path=None,
|
||||
include_only_properties=None):
|
||||
if save_dir:
|
||||
self.save_dir = save_dir
|
||||
else:
|
||||
self.save_dir = None
|
||||
|
||||
if index_filename:
|
||||
self.index_path = os.path.join(save_dir or '.', index_filename)
|
||||
else:
|
||||
self.index_path = os.path.join(save_dir or '.', self.INDEX_FILENAME)
|
||||
if not index_filename:
|
||||
index_filename = self.INDEX_FILENAME
|
||||
|
||||
self.index_path = os.path.join(save_dir or '.', index_filename)
|
||||
|
||||
if not index:
|
||||
self.create_index(overwrite=True)
|
||||
@@ -47,6 +51,16 @@ class PolygonIndex(object):
|
||||
else:
|
||||
self.polygons = polygons
|
||||
|
||||
if not polygons_db_path:
|
||||
polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)
|
||||
|
||||
if not polygons_db:
|
||||
self.polygons_db = LevelDB(polygons_db_path)
|
||||
else:
|
||||
self.polygons_db = polygons_db
|
||||
|
||||
self.setup()
|
||||
|
||||
self.i = 0
|
||||
|
||||
def create_index(self, overwrite=False):
|
||||
@@ -55,6 +69,9 @@ class PolygonIndex(object):
|
||||
def index_polygon(self, polygon):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
||||
def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None):
|
||||
if simplify_tolerance is None:
|
||||
simplify_tolerance = self.simplify_tolerance
|
||||
@@ -62,16 +79,21 @@ class PolygonIndex(object):
|
||||
preserve_topology = self.preserve_topology
|
||||
return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
pass
|
||||
|
||||
def add_polygon(self, poly, properties, include_only_properties=None):
|
||||
if include_only_properties is not None:
|
||||
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
|
||||
self.polygons.append((properties, prep(poly)))
|
||||
|
||||
self.polygons.append(prep(poly))
|
||||
self.polygons_db.Put(str(self.i), json.dumps(properties))
|
||||
self.index_polygon_properties(properties)
|
||||
self.i += 1
|
||||
|
||||
@classmethod
|
||||
def create_from_shapefiles(cls, inputs, output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME,
|
||||
include_only_properties=None):
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
|
||||
for input_file in inputs:
|
||||
@@ -161,20 +183,27 @@ class PolygonIndex(object):
|
||||
def save(self, polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
self.save_polygons(os.path.join(self.save_dir, polys_filename))
|
||||
self.save_index()
|
||||
self.save_polygon_properties(self.save_dir)
|
||||
|
||||
def save_polygons(self, out_filename):
|
||||
out = open(out_filename, 'w')
|
||||
for props, poly in self.polygons:
|
||||
for poly in self.polygons:
|
||||
feature = {
|
||||
'type': 'Feature',
|
||||
'geometry': mapping(poly.context),
|
||||
'properties': props
|
||||
}
|
||||
out.write(json.dumps(feature) + u'\n')
|
||||
self.polygons_db.CompactRange('\x00', '\xff')
|
||||
|
||||
def save_index(self):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
pass
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def load_polygons(cls, filename):
|
||||
f = open(filename)
|
||||
@@ -185,14 +214,14 @@ class PolygonIndex(object):
|
||||
|
||||
if poly_type == 'Polygon':
|
||||
poly = Polygon(feature['geometry']['coordinates'][0])
|
||||
polygons.append((feature['properties'], prep(poly)))
|
||||
polygons.append(prep(poly))
|
||||
elif poly_type == 'MultiPolygon':
|
||||
polys = []
|
||||
for coords in feature['geometry']['coordinates']:
|
||||
poly = Polygon(coords[0])
|
||||
polys.append(poly)
|
||||
|
||||
polygons.append((feature['properties'], prep(MultiPolygon(polys))))
|
||||
polygons.append(prep(MultiPolygon(polys)))
|
||||
return polygons
|
||||
|
||||
@classmethod
|
||||
@@ -200,10 +229,13 @@ class PolygonIndex(object):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
@classmethod
|
||||
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, polys_db_dir=POLYGONS_DB_DIR):
|
||||
index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
|
||||
polys = cls.load_polygons(os.path.join(d, polys_filename))
|
||||
return cls(index=index, polygons=polys, save_dir=d)
|
||||
polygons_db = LevelDB(os.path.join(d, polys_db_dir))
|
||||
polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
|
||||
polygon_index.load_polygon_properties(d)
|
||||
return polygon_index
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
raise NotImplementedError('Children must implement')
|
||||
@@ -215,12 +247,15 @@ class PolygonIndex(object):
|
||||
if return_all:
|
||||
containing = []
|
||||
for i in polys:
|
||||
props, poly = self.polygons[i]
|
||||
poly = self.polygons[i]
|
||||
contains = poly.contains(pt)
|
||||
if contains and not return_all:
|
||||
return props
|
||||
try:
|
||||
return json.loads(self.polygons_db.Get(str(i)))
|
||||
except KeyError:
|
||||
return None
|
||||
elif contains:
|
||||
containing.append(props)
|
||||
containing.append(json.loads(self.polygons_db.Get(str(i))))
|
||||
return containing
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import ujson as json
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
@@ -13,6 +14,8 @@ regional_language_dir = os.path.join(LANGUAGES_DIR, 'regional')
|
||||
|
||||
|
||||
class LanguagePolygonIndex(RTreePolygonIndex):
|
||||
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
||||
ADMIN_LEVELS_FILENAME = 'admin_levels.json'
|
||||
|
||||
include_only_properties = set([
|
||||
'qs_a0',
|
||||
@@ -136,9 +139,20 @@ class LanguagePolygonIndex(RTreePolygonIndex):
|
||||
output_dir, index_filename=index_filename,
|
||||
polys_filename=polys_filename)
|
||||
|
||||
def setup(self):
|
||||
self.admin_levels = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
self.admin_levels.append(properties['admin_level'])
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.admin_levels = json.load(open(os.path.join(d, self.ADMIN_LEVELS_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.admin_levels, open(os.path.join(d, self.ADMIN_LEVELS_FILENAME), 'w'))
|
||||
|
||||
def admin_level(self, i):
|
||||
props, p = self.polygons[i]
|
||||
return props['admin_level']
|
||||
return self.admin_levels[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
|
||||
|
||||
@@ -35,7 +35,7 @@ from geodata.i18n.unicode_properties import get_chars_by_script
|
||||
from geodata.i18n.word_breaks import ideographic_scripts
|
||||
from geodata.names.deduping import NameDeduper
|
||||
from geodata.osm.extract import parse_osm, OSM_NAME_TAGS
|
||||
from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader, OSMZonePolygonReader
|
||||
from geodata.osm.osm_admin_boundaries import OSMAdminPolygonReader, OSMSubdivisionPolygonReader
|
||||
from geodata.polygons.index import *
|
||||
from geodata.statistics.tf_idf import IDFIndex
|
||||
|
||||
@@ -161,6 +161,8 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
||||
'''
|
||||
NEIGHBORHOODS_REPO = 'https://github.com/blackmad/neighborhoods'
|
||||
|
||||
PRIORITIES_FILENAME = 'priorities.json'
|
||||
|
||||
SCRATCH_DIR = '/tmp'
|
||||
|
||||
DUPE_THRESHOLD = 0.9
|
||||
@@ -386,9 +388,20 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
||||
|
||||
return index
|
||||
|
||||
def setup(self):
|
||||
self.priorities = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
self.priorities.append((props['polygon_type'], self.source_priorities[props['source']]))
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.priorities = json.load(open(os.path.join(d, self.PRIORITIES_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.priorities, open(os.path.join(d, self.PRIORITIES_FILENAME), 'w'))
|
||||
|
||||
def priority(self, i):
|
||||
props, p = self.polygons[i]
|
||||
return (self.level_priorities[props['polygon_type']], self.source_priorities[props['source']])
|
||||
return self.priorities[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = super(NeighborhoodReverseGeocoder, self).get_candidate_polygons(lat, lon)
|
||||
@@ -419,6 +432,8 @@ class QuattroshapesReverseGeocoder(RTreePolygonIndex):
|
||||
LOCALITY = 'locality'
|
||||
NEIGHBORHOOD = 'neighborhood'
|
||||
|
||||
PRIORITIES_FILENAME = 'priorities.json'
|
||||
|
||||
sorted_levels = (COUNTRY,
|
||||
ADMIN1_REGION,
|
||||
ADMIN1,
|
||||
@@ -578,9 +593,20 @@ class QuattroshapesReverseGeocoder(RTreePolygonIndex):
|
||||
output_dir, index_filename=index_filename,
|
||||
polys_filename=polys_filename)
|
||||
|
||||
def setup(self):
|
||||
self.priorities = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
self.priorities.append(self.sort_levels.get(properties[self.LEVEL], 0))
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.priorities = json.load(open(os.path.join(d, self.PRIORITIES_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.priorities, open(os.path.join(d, self.PRIORITIES_FILENAME), 'w'))
|
||||
|
||||
def sort_level(self, i):
|
||||
props, p = self.polygons[i]
|
||||
return self.sort_levels.get(props[self.LEVEL], 0)
|
||||
return self.priorities[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = super(QuattroshapesReverseGeocoder, self).get_candidate_polygons(lat, lon)
|
||||
@@ -618,6 +644,8 @@ class OSMReverseGeocoder(RTreePolygonIndex):
|
||||
|
||||
ADMIN_LEVEL = 'admin_level'
|
||||
|
||||
ADMIN_LEVELS_FILENAME = 'admin_levels.json'
|
||||
|
||||
polygon_reader = OSMAdminPolygonReader
|
||||
|
||||
include_property_patterns = set([
|
||||
@@ -713,16 +741,11 @@ class OSMReverseGeocoder(RTreePolygonIndex):
|
||||
# Figure out which outer polygon contains each inner polygon
|
||||
interior = [p2 for p2 in inner if poly.contains(p2)]
|
||||
except TopologicalError:
|
||||
poly = cls.fix_polygon(poly)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
if poly.is_valid:
|
||||
interior = [p2 for p2 in inner if poly.contains(p2)]
|
||||
continue
|
||||
|
||||
if interior:
|
||||
# Polygon with holes constructor
|
||||
poly = Polygon(p, [zip(*p2.exterior.coords.xy) for p2 in interior])
|
||||
poly = cls.fix_polygon(poly)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
# R-tree only stores the bounding box, so add the whole polygon
|
||||
@@ -740,28 +763,47 @@ class OSMReverseGeocoder(RTreePolygonIndex):
|
||||
poly = multi[0]
|
||||
else:
|
||||
continue
|
||||
poly = index.simplify_polygon(poly)
|
||||
if len(poly.exterior.xy[0]) >= cls.large_polygon_threshold:
|
||||
poly = index.simplify_polygon(poly)
|
||||
index.add_polygon(poly, props)
|
||||
|
||||
return index
|
||||
|
||||
def sort_level(self, i):
|
||||
props, p = self.polygons[i]
|
||||
admin_level = props.get(self.ADMIN_LEVEL, 0)
|
||||
def setup(self):
|
||||
self.admin_levels = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
admin_level = properties.get(self.ADMIN_LEVEL, 0)
|
||||
try:
|
||||
return int(admin_level)
|
||||
admin_level = int(admin_level)
|
||||
except ValueError:
|
||||
return 0
|
||||
admin_level = 0
|
||||
self.admin_levels.append(admin_level)
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.admin_levels = json.load(open(os.path.join(d, self.ADMIN_LEVELS_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.admin_levels, open(os.path.join(d, self.ADMIN_LEVELS_FILENAME), 'w'))
|
||||
|
||||
def sort_level(self, i):
|
||||
return self.admin_level[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = super(OSMReverseGeocoder, self).get_candidate_polygons(lat, lon)
|
||||
return sorted(candidates, key=self.sort_level, reverse=True)
|
||||
|
||||
|
||||
class OSMZoneReverseGeocoder(OSMReverseGeocoder):
|
||||
polygon_reader = OSMZonePolygonReader
|
||||
class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder):
|
||||
polygon_reader = OSMSubdivisionPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse'])
|
||||
|
||||
|
||||
class OSMBuildingReverseGeocoder(OSMReverseGeocoder):
|
||||
polygon_reader = OSMBuildingPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Handle argument parsing here
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -772,8 +814,11 @@ if __name__ == '__main__':
|
||||
parser.add_argument('-a', '--osm-admin-file',
|
||||
help='Path to OSM borders file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-l', '--osm-landuse-file',
|
||||
help='Path to OSM landuse file (with dependencies, .osm format)')
|
||||
parser.add_argument('-s', '--osm-subdivisions-file',
|
||||
help='Path to OSM subdivisions file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-b', '--osm-building-polygons-file',
|
||||
help='Path to OSM building polygons file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-n', '--osm-neighborhoods-file',
|
||||
help='Path to OSM neighborhoods file (no dependencies, .osm format)')
|
||||
@@ -788,7 +833,9 @@ if __name__ == '__main__':
|
||||
if args.osm_admin_file:
|
||||
index = OSMReverseGeocoder.create_from_osm_file(args.osm_admin_file, args.out_dir)
|
||||
elif args.osm_landuse_file:
|
||||
index = OSMZoneReverseGeocoder.create_from_osm_file(args.osm_landuse_file, args.out_dir)
|
||||
index = OSMSubdivisionReverseGeocoder.create_from_osm_file(args.osm_landuse_file, args.out_dir)
|
||||
elif args.osm_building_polygons_file:
|
||||
index = OSMBuildingReverseGeocoder.create_from_osm_file(args.osm_building_polygons_file, args.out_dir)
|
||||
elif args.osm_neighborhoods_file and args.quattroshapes_dir:
|
||||
index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes(
|
||||
args.osm_neighborhoods_file,
|
||||
|
||||
Reference in New Issue
Block a user