From 63edd53fb3db67713dc981dc0d724c0afb52ba6f Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 22 Oct 2016 20:28:59 -0400 Subject: [PATCH] [openaddresses] adding clear_cache method to clear the LRU cache for point-in-polygon indices and using it in OpenAddresses import since it heavily reuses polygons and only for the current file --- scripts/geodata/openaddresses/formatter.py | 3 +++ scripts/geodata/polygons/index.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index 2228860d..e34a441f 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -255,6 +255,9 @@ class OpenAddressesFormatter(object): latitude_index = headers.index('LAT') longitude_index = headers.index('LON') + # Clear cached polygons + self.components.osm_admin_rtree.clear_cache() + for row in reader: try: latitude = float(row[latitude_index]) diff --git a/scripts/geodata/polygons/index.py b/scripts/geodata/polygons/index.py index 4e3ebd33..702643d1 100644 --- a/scripts/geodata/polygons/index.py +++ b/scripts/geodata/polygons/index.py @@ -1,4 +1,5 @@ import fiona +import gc import geohash import os import rtree @@ -86,6 +87,12 @@ class PolygonIndex(object): def setup(self): pass + def clear_cache(self, garbage_collect=True): + if self.persistent_polygons and self.cache_size > 0: + self.polygons.clear() + if garbage_collect: + gc.collect() + def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None): if simplify_tolerance is None: simplify_tolerance = self.simplify_tolerance