438 lines
15 KiB
Python
438 lines
15 KiB
Python
import fiona
|
|
import gc
|
|
import geohash
|
|
import os
|
|
import rtree
|
|
import six
|
|
import ujson as json
|
|
|
|
from collections import OrderedDict, defaultdict
|
|
from leveldb import LevelDB
|
|
from lru import LRU
|
|
from shapely.geometry import Point, Polygon, MultiPolygon
|
|
from shapely.prepared import prep
|
|
from shapely.geometry.geo import mapping
|
|
|
|
from geodata.polygons.area import polygon_bounding_box_area
|
|
|
|
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
|
DEFAULT_PROPS_FILENAME = 'properties.json'
|
|
|
|
|
|
class PolygonIndex(object):
|
|
include_only_properties = None
|
|
simplify_tolerance = 0.0001
|
|
preserve_topology = True
|
|
persistent_polygons = False
|
|
cache_size = 0
|
|
|
|
INDEX_FILENAME = None
|
|
POLYGONS_DB_DIR = 'polygons'
|
|
|
|
def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None,
|
|
index_filename=None,
|
|
polygons_db_path=None,
|
|
include_only_properties=None):
|
|
if save_dir:
|
|
self.save_dir = save_dir
|
|
else:
|
|
self.save_dir = None
|
|
|
|
if not index_filename:
|
|
index_filename = self.INDEX_FILENAME
|
|
|
|
self.index_path = os.path.join(save_dir or '.', index_filename)
|
|
|
|
if not index:
|
|
self.create_index(overwrite=True)
|
|
else:
|
|
self.index = index
|
|
|
|
if include_only_properties and hasattr(include_only_properties, '__contains__'):
|
|
self.include_only_properties = include_only_properties
|
|
|
|
if not polygons and not self.persistent_polygons:
|
|
self.polygons = {}
|
|
elif polygons and not self.persistent_polygons:
|
|
self.polygons = polygons
|
|
elif self.persistent_polygons and self.cache_size > 0:
|
|
self.polygons = LRU(self.cache_size)
|
|
if polygons:
|
|
for key, value in six.iteritems(polygons):
|
|
self.polygons[key] = value
|
|
|
|
self.cache_hits = 0
|
|
self.cache_misses = 0
|
|
|
|
self.get_polygon = self.get_polygon_cached
|
|
|
|
if not polygons_db_path:
|
|
polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)
|
|
|
|
if not polygons_db:
|
|
self.polygons_db = LevelDB(polygons_db_path)
|
|
else:
|
|
self.polygons_db = polygons_db
|
|
|
|
self.setup()
|
|
|
|
self.i = 0
|
|
|
|
def create_index(self, overwrite=False):
|
|
raise NotImplementedError('Children must implement')
|
|
|
|
def index_polygon(self, polygon):
|
|
raise NotImplementedError('Children must implement')
|
|
|
|
def setup(self):
|
|
pass
|
|
|
|
def clear_cache(self, garbage_collect=True):
|
|
if self.persistent_polygons and self.cache_size > 0:
|
|
self.polygons.clear()
|
|
if garbage_collect:
|
|
gc.collect()
|
|
|
|
def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None):
|
|
if simplify_tolerance is None:
|
|
simplify_tolerance = self.simplify_tolerance
|
|
if preserve_topology is None:
|
|
preserve_topology = self.preserve_topology
|
|
return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)
|
|
|
|
def index_polygon_properties(self, properties):
|
|
pass
|
|
|
|
def polygon_geojson(self, poly, properties):
|
|
return {
|
|
'type': 'Feature',
|
|
'geometry': mapping(poly),
|
|
}
|
|
|
|
def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
|
|
if include_only_properties is not None:
|
|
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
|
|
|
|
if not self.persistent_polygons or cache:
|
|
self.polygons[self.i] = prep(poly)
|
|
|
|
if self.persistent_polygons:
|
|
self.polygons_db.Put(self.polygon_key(self.i), json.dumps(self.polygon_geojson(poly, properties)))
|
|
|
|
self.polygons_db.Put(self.properties_key(self.i), json.dumps(properties))
|
|
self.index_polygon_properties(properties)
|
|
self.i += 1
|
|
|
|
@classmethod
|
|
def create_from_shapefiles(cls, inputs, output_dir,
|
|
index_filename=None,
|
|
include_only_properties=None):
|
|
index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
|
|
for input_file in inputs:
|
|
if include_only_properties is not None:
|
|
include_props = include_only_properties.get(input_file, cls.include_only_properties)
|
|
else:
|
|
include_props = cls.include_only_properties
|
|
|
|
f = fiona.open(input_file)
|
|
|
|
index.add_geojson_like_file(f)
|
|
|
|
return index
|
|
|
|
@classmethod
|
|
def fix_polygon(cls, poly):
|
|
'''
|
|
Coerce to valid polygon
|
|
'''
|
|
if not poly.is_valid:
|
|
poly = poly.buffer(0)
|
|
if not poly.is_valid:
|
|
return None
|
|
return poly
|
|
|
|
@classmethod
|
|
def to_polygon(cls, coords, holes=None):
|
|
'''
|
|
Create shapely polygon from list of coordinate tuples if valid
|
|
'''
|
|
if not coords or len(coords) < 3:
|
|
return None
|
|
|
|
# Fix for polygons crossing the 180th meridian
|
|
lons = [lon for lon, lat in coords]
|
|
if (max(lons) - min(lons) > 180):
|
|
coords = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in coords]
|
|
if holes:
|
|
holes = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in holes]
|
|
|
|
poly = Polygon(coords, holes)
|
|
if not poly.is_valid and not poly.contains(poly.centroid):
|
|
poly_fix = cls.fix_polygon(poly)
|
|
if poly_fix is not None and poly_fix.bounds and len(poly_fix.bounds) == 4 and poly_fix.is_valid and (poly_fix.contains(poly.centroid) or poly_fix.contains(poly_fix.representative_point())):
|
|
poly = poly_fix
|
|
|
|
return poly
|
|
|
|
def add_geojson_like_record(self, rec, include_only_properties=None):
|
|
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
|
|
return
|
|
poly_type = rec['geometry']['type']
|
|
if poly_type == 'Polygon':
|
|
coords = rec['geometry']['coordinates'][0]
|
|
poly = self.to_polygon(coords)
|
|
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
|
return
|
|
self.index_polygon(poly)
|
|
self.add_polygon(poly, rec['properties'], include_only_properties=include_only_properties)
|
|
elif poly_type == 'MultiPolygon':
|
|
polys = []
|
|
poly_coords = rec['geometry']['coordinates']
|
|
for coords in poly_coords:
|
|
poly = self.to_polygon(coords[0])
|
|
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
|
continue
|
|
polys.append(poly)
|
|
self.index_polygon(poly)
|
|
|
|
self.add_polygon(MultiPolygon(polys), rec['properties'], include_only_properties=include_only_properties)
|
|
else:
|
|
return
|
|
|
|
def add_geojson_like_file(self, f, include_only_properties=None):
|
|
'''
|
|
Add either GeoJSON or a shapefile record to the index
|
|
'''
|
|
|
|
for rec in f:
|
|
self.add_geojson_like_record(rec, include_only_properties=include_only_properties)
|
|
|
|
@classmethod
|
|
def create_from_geojson_files(cls, inputs, output_dir,
|
|
index_filename=None,
|
|
polys_filename=DEFAULT_POLYS_FILENAME,
|
|
include_only_properties=None):
|
|
index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
|
|
for input_file in inputs:
|
|
if include_only_properties is not None:
|
|
include_props = include_only_properties.get(input_file, cls.include_only_properties)
|
|
else:
|
|
include_props = cls.include_only_properties
|
|
|
|
f = json.load(open(input_file))
|
|
|
|
index.add_geojson_like_file(f['features'], include_only_properties=include_props)
|
|
|
|
return index
|
|
|
|
def compact_polygons_db(self):
|
|
self.polygons_db.CompactRange('\x00', '\xff')
|
|
|
|
def save(self):
|
|
self.save_index()
|
|
self.save_properties(os.path.join(self.save_dir, DEFAULT_PROPS_FILENAME))
|
|
if not self.persistent_polygons:
|
|
self.save_polygons(os.path.join(self.save_dir, DEFAULT_POLYS_FILENAME))
|
|
self.compact_polygons_db()
|
|
self.save_polygon_properties(self.save_dir)
|
|
|
|
def load_properties(self, filename):
|
|
properties = json.load(open(filename))
|
|
self.i = int(properties.get('num_polygons', self.i))
|
|
|
|
def save_properties(self, out_filename):
|
|
out = open(out_filename, 'w')
|
|
json.dump({'num_polygons': str(self.i)}, out)
|
|
|
|
def save_polygons(self, out_filename):
|
|
out = open(out_filename, 'w')
|
|
for i in xrange(self.i):
|
|
poly = self.polygons[i]
|
|
feature = {
|
|
'type': 'Feature',
|
|
'geometry': mapping(poly.context),
|
|
}
|
|
out.write(json.dumps(feature) + u'\n')
|
|
|
|
def save_index(self):
|
|
raise NotImplementedError('Children must implement')
|
|
|
|
def load_polygon_properties(self, d):
|
|
pass
|
|
|
|
def save_polygon_properties(self, d):
|
|
pass
|
|
|
|
@classmethod
|
|
def polygon_from_geojson(cls, feature):
|
|
poly_type = feature['geometry']['type']
|
|
if poly_type == 'Polygon':
|
|
coords = feature['geometry']['coordinates']
|
|
poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
|
|
return poly
|
|
elif poly_type == 'MultiPolygon':
|
|
polys = []
|
|
for coords in feature['geometry']['coordinates']:
|
|
poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
|
|
polys.append(poly)
|
|
|
|
return MultiPolygon(polys)
|
|
|
|
@classmethod
|
|
def load_polygons(cls, filename):
|
|
f = open(filename)
|
|
polygons = {}
|
|
cls.i = 0
|
|
for line in f:
|
|
feature = json.loads(line.rstrip())
|
|
polygons[cls.i] = prep(cls.polygon_from_geojson(feature))
|
|
cls.i += 1
|
|
return polygons
|
|
|
|
@classmethod
|
|
def load_index(cls, d, index_name=None):
|
|
raise NotImplementedError('Children must implement')
|
|
|
|
@classmethod
|
|
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME,
|
|
properties_filename=DEFAULT_PROPS_FILENAME,
|
|
polys_db_dir=POLYGONS_DB_DIR):
|
|
index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
|
|
if not cls.persistent_polygons:
|
|
polys = cls.load_polygons(os.path.join(d, polys_filename))
|
|
else:
|
|
polys = None
|
|
polygons_db = LevelDB(os.path.join(d, polys_db_dir))
|
|
polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
|
|
polygon_index.load_properties(os.path.join(d, properties_filename))
|
|
polygon_index.load_polygon_properties(d)
|
|
return polygon_index
|
|
|
|
def get_candidate_polygons(self, lat, lon):
|
|
raise NotImplementedError('Children must implement')
|
|
|
|
def get_properties(self, i):
|
|
return json.loads(self.polygons_db.Get(self.properties_key(i)))
|
|
|
|
def get_polygon(self, i):
|
|
return self.polygons[i]
|
|
|
|
def get_polygon_cached(self, i):
|
|
poly = self.polygons.get(i, None)
|
|
if poly is None:
|
|
data = json.loads(self.polygons_db.Get(self.polygon_key(i)))
|
|
poly = prep(self.polygon_from_geojson(data))
|
|
self.polygons[i] = poly
|
|
self.cache_misses += 1
|
|
else:
|
|
self.cache_hits += 1
|
|
return poly
|
|
|
|
def __iter__(self):
|
|
for i in xrange(self.i):
|
|
yield self.get_properties(i), self.get_polygon(i)
|
|
|
|
def __len__(self):
|
|
return self.i
|
|
|
|
def polygons_contain(self, candidates, point, return_all=False):
|
|
containing = None
|
|
if return_all:
|
|
containing = []
|
|
for i in candidates:
|
|
poly = self.get_polygon(i)
|
|
contains = poly.contains(point)
|
|
if contains:
|
|
properties = self.get_properties(i)
|
|
if not return_all:
|
|
return properties
|
|
else:
|
|
containing.append(properties)
|
|
return containing
|
|
|
|
def polygon_key(self, i):
|
|
return 'poly:{}'.format(i)
|
|
|
|
def properties_key(self, i):
|
|
return 'props:{}'.format(i)
|
|
|
|
def point_in_poly(self, lat, lon, return_all=False):
|
|
candidates = self.get_candidate_polygons(lat, lon)
|
|
point = Point(lon, lat)
|
|
return self.polygons_contain(candidates, point, return_all=return_all)
|
|
|
|
|
|
class RTreePolygonIndex(PolygonIndex):
|
|
INDEX_FILENAME = 'rtree'
|
|
|
|
def create_index(self, overwrite=False):
|
|
self.index = rtree.index.Index(self.index_path, overwrite=overwrite)
|
|
|
|
def index_polygon(self, polygon):
|
|
self.index.insert(self.i, polygon.bounds)
|
|
|
|
def get_candidate_polygons(self, lat, lon):
|
|
return OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
|
|
|
|
def save_index(self):
|
|
# need to close index before loading it
|
|
self.index.close()
|
|
|
|
@classmethod
|
|
def load_index(cls, d, index_name=None):
|
|
return rtree.index.Index(os.path.join(d, index_name or cls.INDEX_FILENAME))
|
|
|
|
|
|
class GeohashPolygonIndex(PolygonIndex):
|
|
# Reference: https://www.elastic.co/guide/en/elasticsearch/guide/current/geohashes.html
|
|
GEOHASH_PRECISIONS = [
|
|
(7, 152.8 ** 2),
|
|
(6, 1200.0 * 610.0),
|
|
(5, 4900.0 * 4900.0),
|
|
(4, 39000.0 * 19500.0),
|
|
(3, 156000.0 * 156000.0),
|
|
(2, 1251000.0 * 625000.0),
|
|
]
|
|
|
|
INDEX_FILENAME = 'index.json'
|
|
|
|
def create_index(self, overwrite=False):
|
|
self.index = defaultdict(list)
|
|
|
|
def index_point(self, lat, lon, geohash_level):
|
|
code = geohash.encode(lat, lon)[:geohash_level]
|
|
|
|
for key in [code] + geohash.neighbors(code):
|
|
self.index[key].append(self.i)
|
|
|
|
def index_polygon(self, polygon):
|
|
poly_area = polygon_bounding_box_area(polygon)
|
|
for geohash_level, area in self.GEOHASH_PRECISIONS:
|
|
if area * 9.0 >= poly_area:
|
|
break
|
|
|
|
bbox = polygon.bounds
|
|
lat, lon = (bbox[1] + bbox[3]) / 2.0, (bbox[0] + bbox[2]) / 2.0
|
|
self.index_point(lat, lon, geohash_level)
|
|
|
|
def get_candidate_polygons(self, lat, lon, return_all=False):
|
|
candidates = []
|
|
code = geohash.encode(lat, lon)
|
|
for level, area in self.GEOHASH_PRECISIONS:
|
|
indices = self.index.get(code[:level])
|
|
if not indices:
|
|
continue
|
|
candidates.extend(indices)
|
|
if not return_all:
|
|
break
|
|
return candidates
|
|
|
|
def save_index(self):
|
|
if not self.index_path:
|
|
self.index_path = os.path.join(self.save_dir or '.', self.INDEX_FILENAME)
|
|
json.dump(self.index, open(self.index_path, 'w'))
|
|
|
|
@classmethod
|
|
def load_index(cls, d, index_name=None):
|
|
return json.load(open(os.path.join(d, index_name or cls.INDEX_FILENAME)))
|