[osm/polygons] Using an LRU cache for prepped polygons in the various PolygonIndex subclasses. That way can store less simplified polygons but keep frequently accessed ones (like countries) in memory

This commit is contained in:
Al
2016-04-05 14:53:07 -04:00
parent 4b4dca5948
commit 499a20cb36
2 changed files with 110 additions and 37 deletions

View File

@@ -2,11 +2,12 @@ import fiona
import geohash import geohash
import os import os
import rtree import rtree
import six
import ujson as json import ujson as json
from leveldb import LevelDB
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
from leveldb import LevelDB
from lru import LRU
from shapely.geometry import Point, Polygon, MultiPolygon from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.prepared import prep from shapely.prepared import prep
from shapely.geometry.geo import mapping from shapely.geometry.geo import mapping
@@ -20,7 +21,8 @@ class PolygonIndex(object):
include_only_properties = None include_only_properties = None
simplify_tolerance = 0.00001 simplify_tolerance = 0.00001
preserve_topology = True preserve_topology = True
large_polygon_threshold = 10000 persistent_polygons = False
cache_size = 0
INDEX_FILENAME = None INDEX_FILENAME = None
POLYGONS_DB_DIR = 'polygons' POLYGONS_DB_DIR = 'polygons'
@@ -47,10 +49,17 @@ class PolygonIndex(object):
if include_only_properties and hasattr(include_only_properties, '__contains__'): if include_only_properties and hasattr(include_only_properties, '__contains__'):
self.include_only_properties = include_only_properties self.include_only_properties = include_only_properties
if not polygons: if not polygons and not self.persistent_polygons:
self.polygons = [] self.polygons = {}
else: elif polygons and not self.persistent_polygons:
self.polygons = polygons self.polygons = polygons
elif self.persistent_polygons and self.cache_size > 0:
self.polygons = LRU(self.cache_size)
if polygons:
for key, value in six.iteritems(polygons):
self.polygons[key] = value
self.polygons_contain = self.polygons_contain_cached
if not polygons_db_path: if not polygons_db_path:
polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR) polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)
@@ -83,12 +92,26 @@ class PolygonIndex(object):
def index_polygon_properties(self, properties): def index_polygon_properties(self, properties):
pass pass
def add_polygon(self, poly, properties, include_only_properties=None): def polygon_geojson(self, poly, properties):
return {
'type': 'Feature',
'geometry': mapping(poly),
'properties': properties
}
def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
if include_only_properties is not None: if include_only_properties is not None:
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties} properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
self.polygons.append(prep(poly)) if not self.persistent_polygons or cache:
self.polygons_db.Put(str(self.i), json.dumps(properties)) self.polygons[self.i] = prep(poly)
if not self.persistent_polygons:
value = json.dumps(properties)
else:
value = json.dumps(self.polygon_geojson(poly, properties))
self.polygons_db.Put(str(self.i), value)
self.index_polygon_properties(properties) self.index_polygon_properties(properties)
self.i += 1 self.i += 1
@@ -181,20 +204,25 @@ class PolygonIndex(object):
return index return index
def compact_polygons_db(self):
self.polygons_db.CompactRange('\x00', '\xff')
def save(self, polys_filename=DEFAULT_POLYS_FILENAME): def save(self, polys_filename=DEFAULT_POLYS_FILENAME):
self.save_polygons(os.path.join(self.save_dir, polys_filename))
self.save_index() self.save_index()
if not self.persistent_polygons:
self.save_polygons(os.path.join(self.save_dir, polys_filename))
self.compact_polygons_db()
self.save_polygon_properties(self.save_dir) self.save_polygon_properties(self.save_dir)
def save_polygons(self, out_filename): def save_polygons(self, out_filename):
out = open(out_filename, 'w') out = open(out_filename, 'w')
for poly in self.polygons: for i in xrange(self.i):
poly = self.polygons[i]
feature = { feature = {
'type': 'Feature', 'type': 'Feature',
'geometry': mapping(poly.context), 'geometry': mapping(poly.context),
} }
out.write(json.dumps(feature) + u'\n') out.write(json.dumps(feature) + u'\n')
self.polygons_db.CompactRange('\x00', '\xff')
def save_index(self): def save_index(self):
raise NotImplementedError('Children must implement') raise NotImplementedError('Children must implement')
@@ -205,24 +233,29 @@ class PolygonIndex(object):
def save_polygon_properties(self, d): def save_polygon_properties(self, d):
pass pass
@classmethod
def polygon_from_geojson(cls, feature):
poly_type = feature['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(feature['geometry']['coordinates'][0])
return poly
elif poly_type == 'MultiPolygon':
polys = []
for coords in feature['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
return prep(MultiPolygon(polys))
@classmethod @classmethod
def load_polygons(cls, filename): def load_polygons(cls, filename):
f = open(filename) f = open(filename)
polygons = [] polygons = {}
self.i = 0
for line in f: for line in f:
feature = json.loads(line.rstrip()) feature = json.loads(line.rstrip())
poly_type = feature['geometry']['type'] polygons[i] = prep(cls.polygon_from_geojson(feature))
self.i += 1
if poly_type == 'Polygon':
poly = Polygon(feature['geometry']['coordinates'][0])
polygons.append(prep(poly))
elif poly_type == 'MultiPolygon':
polys = []
for coords in feature['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
polygons.append(prep(MultiPolygon(polys)))
return polygons return polygons
@classmethod @classmethod
@@ -232,7 +265,10 @@ class PolygonIndex(object):
@classmethod @classmethod
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, polys_db_dir=POLYGONS_DB_DIR): def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, polys_db_dir=POLYGONS_DB_DIR):
index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME) index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
polys = cls.load_polygons(os.path.join(d, polys_filename)) if not cls.persistent_polygons:
polys = cls.load_polygons(os.path.join(d, polys_filename))
else:
polys = None
polygons_db = LevelDB(os.path.join(d, polys_db_dir)) polygons_db = LevelDB(os.path.join(d, polys_db_dir))
polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d) polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
polygon_index.load_polygon_properties(d) polygon_index.load_polygon_properties(d)
@@ -241,24 +277,51 @@ class PolygonIndex(object):
def get_candidate_polygons(self, lat, lon): def get_candidate_polygons(self, lat, lon):
raise NotImplementedError('Children must implement') raise NotImplementedError('Children must implement')
def point_in_poly(self, lat, lon, return_all=False): def polygons_contain(self, candidates, point, return_all=False):
polys = self.get_candidate_polygons(lat, lon)
pt = Point(lon, lat)
containing = None containing = None
if return_all: if return_all:
containing = [] containing = []
for i in polys: for i in candidates:
poly = self.polygons[i] poly = self.polygons[i]
contains = poly.contains(pt) contains = poly.contains(pt)
if contains and not return_all: if contains:
try: properties = json.loads(self.polygons_db.Get(str(i)))
return json.loads(self.polygons_db.Get(str(i))) if not return_all:
except KeyError: return properties
return None else:
elif contains: containing.append(properties)
containing.append(json.loads(self.polygons_db.Get(str(i))))
return containing return containing
def polygons_contain_cached(self, candidates, point, return_all=False):
containing = None
if return_all:
containing = []
for i in candidates:
poly = self.polygons.get(i)
data = {}
if poly is None:
data = json.loads(self.polygons_db.Get(str(i)))
poly = prep(self.polygon_from_geojson(data))
self.polygons[i] = poly
contains = poly.contains(point)
if contains:
if not data:
data = json.loads(self.polygons_db.Get(str(i)))
properties = data['properties']
if not return_all:
return properties
else:
containing.append(properties)
return containing
def point_in_poly(self, lat, lon, return_all=False):
candidates = self.get_candidate_polygons(lat, lon)
point = Point(lon, lat)
return self.polygons_contain(candidates, point)
class RTreePolygonIndex(PolygonIndex): class RTreePolygonIndex(PolygonIndex):
INDEX_FILENAME = 'rtree' INDEX_FILENAME = 'rtree'

View File

@@ -434,6 +434,9 @@ class QuattroshapesReverseGeocoder(RTreePolygonIndex):
PRIORITIES_FILENAME = 'priorities.json' PRIORITIES_FILENAME = 'priorities.json'
persistent_polygons = True
cache_size = 100000
sorted_levels = (COUNTRY, sorted_levels = (COUNTRY,
ADMIN1_REGION, ADMIN1_REGION,
ADMIN1, ADMIN1,
@@ -648,6 +651,9 @@ class OSMReverseGeocoder(RTreePolygonIndex):
polygon_reader = OSMAdminPolygonReader polygon_reader = OSMAdminPolygonReader
persistent_polygons = True
cache_size = 100000
include_property_patterns = set([ include_property_patterns = set([
'name', 'name',
'name:*', 'name:*',
@@ -794,11 +800,15 @@ class OSMReverseGeocoder(RTreePolygonIndex):
class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder): class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder):
persistent_polygons = True
cache_size = 10000
polygon_reader = OSMSubdivisionPolygonReader polygon_reader = OSMSubdivisionPolygonReader
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse']) include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse'])
class OSMBuildingReverseGeocoder(OSMReverseGeocoder): class OSMBuildingReverseGeocoder(OSMReverseGeocoder):
persistent_polygons = True
cache_size = 10000
polygon_reader = OSMBuildingPolygonReader polygon_reader = OSMBuildingPolygonReader
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels']) include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels'])