From 499a20cb3632e4f8044449c15558c9009cfd0705 Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 5 Apr 2016 14:53:07 -0400 Subject: [PATCH] [osm/polygons] Using an LRU cache for prepped polygons in the various PolygonIndex subclasses. That way can store less simplified polygons but keep frequently accessed ones (like countries) in memory --- scripts/geodata/polygons/index.py | 137 ++++++++++++++------ scripts/geodata/polygons/reverse_geocode.py | 10 ++ 2 files changed, 110 insertions(+), 37 deletions(-) diff --git a/scripts/geodata/polygons/index.py b/scripts/geodata/polygons/index.py index 7602e86b..adc4847d 100644 --- a/scripts/geodata/polygons/index.py +++ b/scripts/geodata/polygons/index.py @@ -2,11 +2,12 @@ import fiona import geohash import os import rtree +import six import ujson as json -from leveldb import LevelDB - from collections import OrderedDict, defaultdict +from leveldb import LevelDB +from lru import LRU from shapely.geometry import Point, Polygon, MultiPolygon from shapely.prepared import prep from shapely.geometry.geo import mapping @@ -20,7 +21,8 @@ class PolygonIndex(object): include_only_properties = None simplify_tolerance = 0.00001 preserve_topology = True - large_polygon_threshold = 10000 + persistent_polygons = False + cache_size = 0 INDEX_FILENAME = None POLYGONS_DB_DIR = 'polygons' @@ -47,10 +49,17 @@ class PolygonIndex(object): if include_only_properties and hasattr(include_only_properties, '__contains__'): self.include_only_properties = include_only_properties - if not polygons: - self.polygons = [] - else: + if not polygons and not self.persistent_polygons: + self.polygons = {} + elif polygons and not self.persistent_polygons: self.polygons = polygons + elif self.persistent_polygons and self.cache_size > 0: + self.polygons = LRU(self.cache_size) + if polygons: + for key, value in six.iteritems(polygons): + self.polygons[key] = value + + self.polygons_contain = self.polygons_contain_cached if not polygons_db_path: polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR) @@ -83,12 +92,26 @@ class PolygonIndex(object): def index_polygon_properties(self, properties): pass - def add_polygon(self, poly, properties, include_only_properties=None): + def polygon_geojson(self, poly, properties): + return { + 'type': 'Feature', + 'geometry': mapping(poly), + 'properties': properties + } + + def add_polygon(self, poly, properties, cache=False, include_only_properties=None): if include_only_properties is not None: properties = {k: v for k, v in properties.iteritems() if k in include_only_properties} - self.polygons.append(prep(poly)) - self.polygons_db.Put(str(self.i), json.dumps(properties)) + if not self.persistent_polygons or cache: + self.polygons[self.i] = prep(poly) + + if not self.persistent_polygons: + value = json.dumps(properties) + else: + value = json.dumps(self.polygon_geojson(poly, properties)) + + self.polygons_db.Put(str(self.i), value) self.index_polygon_properties(properties) self.i += 1 @@ -181,20 +204,25 @@ class PolygonIndex(object): return index + def compact_polygons_db(self): + self.polygons_db.CompactRange('\x00', '\xff') + def save(self, polys_filename=DEFAULT_POLYS_FILENAME): - self.save_polygons(os.path.join(self.save_dir, polys_filename)) self.save_index() + if not self.persistent_polygons: + self.save_polygons(os.path.join(self.save_dir, polys_filename)) + self.compact_polygons_db() self.save_polygon_properties(self.save_dir) def save_polygons(self, out_filename): out = open(out_filename, 'w') - for poly in self.polygons: + for i in xrange(self.i): + poly = self.polygons[i] feature = { 'type': 'Feature', 'geometry': mapping(poly.context), } out.write(json.dumps(feature) + u'\n') - self.polygons_db.CompactRange('\x00', '\xff') def save_index(self): raise NotImplementedError('Children must implement') @@ -205,24 +233,29 @@ class PolygonIndex(object): def save_polygon_properties(self, d): pass + @classmethod + def polygon_from_geojson(cls, feature): + poly_type = feature['geometry']['type'] + if poly_type == 'Polygon': + poly = Polygon(feature['geometry']['coordinates'][0]) + return poly + elif poly_type == 'MultiPolygon': + polys = [] + for coords in feature['geometry']['coordinates']: + poly = Polygon(coords[0]) + polys.append(poly) + + return prep(MultiPolygon(polys)) + @classmethod def load_polygons(cls, filename): f = open(filename) - polygons = [] + polygons = {} + self.i = 0 for line in f: feature = json.loads(line.rstrip()) - poly_type = feature['geometry']['type'] - - if poly_type == 'Polygon': - poly = Polygon(feature['geometry']['coordinates'][0]) - polygons.append(prep(poly)) - elif poly_type == 'MultiPolygon': - polys = [] - for coords in feature['geometry']['coordinates']: - poly = Polygon(coords[0]) - polys.append(poly) - - polygons.append(prep(MultiPolygon(polys))) + polygons[i] = prep(cls.polygon_from_geojson(feature)) + self.i += 1 return polygons @classmethod @@ -232,7 +265,10 @@ class PolygonIndex(object): @classmethod def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, polys_db_dir=POLYGONS_DB_DIR): index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME) - polys = cls.load_polygons(os.path.join(d, polys_filename)) + if not cls.persistent_polygons: + polys = cls.load_polygons(os.path.join(d, polys_filename)) + else: + polys = None polygons_db = LevelDB(os.path.join(d, polys_db_dir)) polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d) polygon_index.load_polygon_properties(d) @@ -241,24 +277,51 @@ class PolygonIndex(object): def get_candidate_polygons(self, lat, lon): raise NotImplementedError('Children must implement') - def point_in_poly(self, lat, lon, return_all=False): - polys = self.get_candidate_polygons(lat, lon) - pt = Point(lon, lat) + def polygons_contain(self, candidates, point, return_all=False): containing = None if return_all: containing = [] - for i in polys: + for i in candidates: poly = self.polygons[i] contains = poly.contains(pt) - if contains and not return_all: - try: - return json.loads(self.polygons_db.Get(str(i))) - except KeyError: - return None - elif contains: - containing.append(json.loads(self.polygons_db.Get(str(i)))) + if contains: + properties = json.loads(self.polygons_db.Get(str(i))) + if not return_all: + return properties + else: + containing.append(properties) return containing + def polygons_contain_cached(self, candidates, point, return_all=False): + containing = None + if return_all: + containing = [] + + for i in candidates: + poly = self.polygons.get(i) + data = {} + if poly is None: + data = json.loads(self.polygons_db.Get(str(i))) + poly = prep(self.polygon_from_geojson(data)) + self.polygons[i] = poly + + contains = poly.contains(point) + if contains: + if not data: + data = json.loads(self.polygons_db.Get(str(i))) + + properties = data['properties'] + if not return_all: + return properties + else: + containing.append(properties) + return containing + + def point_in_poly(self, lat, lon, return_all=False): + candidates = self.get_candidate_polygons(lat, lon) + point = Point(lon, lat) + return self.polygons_contain(candidates, point) + class RTreePolygonIndex(PolygonIndex): INDEX_FILENAME = 'rtree' diff --git a/scripts/geodata/polygons/reverse_geocode.py b/scripts/geodata/polygons/reverse_geocode.py index a377dc0e..07f9a732 100644 --- a/scripts/geodata/polygons/reverse_geocode.py +++ b/scripts/geodata/polygons/reverse_geocode.py @@ -434,6 +434,9 @@ class QuattroshapesReverseGeocoder(RTreePolygonIndex): PRIORITIES_FILENAME = 'priorities.json' + persistent_polygons = True + cache_size = 100000 + sorted_levels = (COUNTRY, ADMIN1_REGION, ADMIN1, @@ -648,6 +651,9 @@ class OSMReverseGeocoder(RTreePolygonIndex): polygon_reader = OSMAdminPolygonReader + persistent_polygons = True + cache_size = 100000 + include_property_patterns = set([ 'name', 'name:*', @@ -794,11 +800,15 @@ class OSMReverseGeocoder(RTreePolygonIndex): class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder): + persistent_polygons = True + cache_size = 10000 polygon_reader = OSMSubdivisionPolygonReader include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse']) class OSMBuildingReverseGeocoder(OSMReverseGeocoder): + persistent_polygons = True + cache_size = 10000 polygon_reader = OSMBuildingPolygonReader include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels'])