[osm/polygons] Using an LRU cache for prepped polygons in the various PolygonIndex subclasses. That way can store less simplified polygons but keep frequently accessed ones (like countries) in memory

This commit is contained in:
Al
2016-04-05 14:53:07 -04:00
parent 01567d2672
commit 004165d184
2 changed files with 110 additions and 37 deletions

View File

@@ -2,11 +2,12 @@ import fiona
import geohash
import os
import rtree
import six
import ujson as json
from leveldb import LevelDB
from collections import OrderedDict, defaultdict
from leveldb import LevelDB
from lru import LRU
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.prepared import prep
from shapely.geometry.geo import mapping
@@ -20,7 +21,8 @@ class PolygonIndex(object):
include_only_properties = None
simplify_tolerance = 0.00001
preserve_topology = True
large_polygon_threshold = 10000
persistent_polygons = False
cache_size = 0
INDEX_FILENAME = None
POLYGONS_DB_DIR = 'polygons'
@@ -47,10 +49,17 @@ class PolygonIndex(object):
if include_only_properties and hasattr(include_only_properties, '__contains__'):
self.include_only_properties = include_only_properties
if not polygons:
self.polygons = []
else:
if not polygons and not self.persistent_polygons:
self.polygons = {}
elif polygons and not self.persistent_polygons:
self.polygons = polygons
elif self.persistent_polygons and self.cache_size > 0:
self.polygons = LRU(self.cache_size)
if polygons:
for key, value in six.iteritems(polygons):
self.polygons[key] = value
self.polygons_contain = self.polygons_contain_cached
if not polygons_db_path:
polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)
@@ -83,12 +92,26 @@ class PolygonIndex(object):
def index_polygon_properties(self, properties):
pass
def add_polygon(self, poly, properties, include_only_properties=None):
def polygon_geojson(self, poly, properties):
return {
'type': 'Feature',
'geometry': mapping(poly),
'properties': properties
}
def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
if include_only_properties is not None:
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
self.polygons.append(prep(poly))
self.polygons_db.Put(str(self.i), json.dumps(properties))
if not self.persistent_polygons or cache:
self.polygons[self.i] = prep(poly)
if not self.persistent_polygons:
value = json.dumps(properties)
else:
value = json.dumps(self.polygon_geojson(poly, properties))
self.polygons_db.Put(str(self.i), value)
self.index_polygon_properties(properties)
self.i += 1
@@ -181,20 +204,25 @@ class PolygonIndex(object):
return index
def compact_polygons_db(self):
self.polygons_db.CompactRange('\x00', '\xff')
def save(self, polys_filename=DEFAULT_POLYS_FILENAME):
self.save_polygons(os.path.join(self.save_dir, polys_filename))
self.save_index()
if not self.persistent_polygons:
self.save_polygons(os.path.join(self.save_dir, polys_filename))
self.compact_polygons_db()
self.save_polygon_properties(self.save_dir)
def save_polygons(self, out_filename):
out = open(out_filename, 'w')
for poly in self.polygons:
for i in xrange(self.i):
poly = self.polygons[i]
feature = {
'type': 'Feature',
'geometry': mapping(poly.context),
}
out.write(json.dumps(feature) + u'\n')
self.polygons_db.CompactRange('\x00', '\xff')
def save_index(self):
raise NotImplementedError('Children must implement')
@@ -205,24 +233,29 @@ class PolygonIndex(object):
def save_polygon_properties(self, d):
pass
@classmethod
def polygon_from_geojson(cls, feature):
poly_type = feature['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(feature['geometry']['coordinates'][0])
return poly
elif poly_type == 'MultiPolygon':
polys = []
for coords in feature['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
return prep(MultiPolygon(polys))
@classmethod
def load_polygons(cls, filename):
f = open(filename)
polygons = []
polygons = {}
self.i = 0
for line in f:
feature = json.loads(line.rstrip())
poly_type = feature['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(feature['geometry']['coordinates'][0])
polygons.append(prep(poly))
elif poly_type == 'MultiPolygon':
polys = []
for coords in feature['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
polygons.append(prep(MultiPolygon(polys)))
polygons[i] = prep(cls.polygon_from_geojson(feature))
self.i += 1
return polygons
@classmethod
@@ -232,7 +265,10 @@ class PolygonIndex(object):
@classmethod
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, polys_db_dir=POLYGONS_DB_DIR):
index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
polys = cls.load_polygons(os.path.join(d, polys_filename))
if not cls.persistent_polygons:
polys = cls.load_polygons(os.path.join(d, polys_filename))
else:
polys = None
polygons_db = LevelDB(os.path.join(d, polys_db_dir))
polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
polygon_index.load_polygon_properties(d)
@@ -241,24 +277,51 @@ class PolygonIndex(object):
def get_candidate_polygons(self, lat, lon):
raise NotImplementedError('Children must implement')
def point_in_poly(self, lat, lon, return_all=False):
polys = self.get_candidate_polygons(lat, lon)
pt = Point(lon, lat)
def polygons_contain(self, candidates, point, return_all=False):
containing = None
if return_all:
containing = []
for i in polys:
for i in candidates:
poly = self.polygons[i]
contains = poly.contains(pt)
if contains and not return_all:
try:
return json.loads(self.polygons_db.Get(str(i)))
except KeyError:
return None
elif contains:
containing.append(json.loads(self.polygons_db.Get(str(i))))
if contains:
properties = json.loads(self.polygons_db.Get(str(i)))
if not return_all:
return properties
else:
containing.append(properties)
return containing
def polygons_contain_cached(self, candidates, point, return_all=False):
containing = None
if return_all:
containing = []
for i in candidates:
poly = self.polygons.get(i)
data = {}
if poly is None:
data = json.loads(self.polygons_db.Get(str(i)))
poly = prep(self.polygon_from_geojson(data))
self.polygons[i] = poly
contains = poly.contains(point)
if contains:
if not data:
data = json.loads(self.polygons_db.Get(str(i)))
properties = data['properties']
if not return_all:
return properties
else:
containing.append(properties)
return containing
def point_in_poly(self, lat, lon, return_all=False):
candidates = self.get_candidate_polygons(lat, lon)
point = Point(lon, lat)
return self.polygons_contain(candidates, point)
class RTreePolygonIndex(PolygonIndex):
INDEX_FILENAME = 'rtree'

View File

@@ -434,6 +434,9 @@ class QuattroshapesReverseGeocoder(RTreePolygonIndex):
PRIORITIES_FILENAME = 'priorities.json'
persistent_polygons = True
cache_size = 100000
sorted_levels = (COUNTRY,
ADMIN1_REGION,
ADMIN1,
@@ -648,6 +651,9 @@ class OSMReverseGeocoder(RTreePolygonIndex):
polygon_reader = OSMAdminPolygonReader
persistent_polygons = True
cache_size = 100000
include_property_patterns = set([
'name',
'name:*',
@@ -794,11 +800,15 @@ class OSMReverseGeocoder(RTreePolygonIndex):
class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder):
persistent_polygons = True
cache_size = 10000
polygon_reader = OSMSubdivisionPolygonReader
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse'])
class OSMBuildingReverseGeocoder(OSMReverseGeocoder):
persistent_polygons = True
cache_size = 10000
polygon_reader = OSMBuildingPolygonReader
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels'])