[points] Adding point reverse geocoding index
This commit is contained in:
0
scripts/geodata/points/__init__.py
Normal file
0
scripts/geodata/points/__init__.py
Normal file
159
scripts/geodata/points/index.py
Normal file
159
scripts/geodata/points/index.py
Normal file
@@ -0,0 +1,159 @@
|
||||
import geohash
|
||||
import os
|
||||
import math
|
||||
import operator
|
||||
import six
|
||||
import ujson as json
|
||||
|
||||
from collections import defaultdict, OrderedDict
|
||||
|
||||
from leveldb import LevelDB
|
||||
|
||||
EARTH_RADIUS_KM = 6373 # in km
|
||||
|
||||
|
||||
class PointIndex(object):
|
||||
include_only_properties = None
|
||||
persistent_index = False
|
||||
cache_size = 0
|
||||
|
||||
INDEX_FILENAME = None
|
||||
POINTS_DB_DIR = 'points'
|
||||
|
||||
DEFAULT_GEOHASH_PRECISION = 7
|
||||
DEFAULT_PROPS_FILENAME = 'properties.json'
|
||||
|
||||
INDEX_FILENAME = 'index.json'
|
||||
|
||||
def __init__(self, index=None,
|
||||
points=None,
|
||||
points_db=None, save_dir=None,
|
||||
points_db_path=None,
|
||||
index_path=None,
|
||||
include_only_properties=None,
|
||||
precision=DEFAULT_GEOHASH_PRECISION):
|
||||
if save_dir:
|
||||
self.save_dir = save_dir
|
||||
else:
|
||||
self.save_dir = None
|
||||
|
||||
if include_only_properties and hasattr(include_only_properties, '__contains__'):
|
||||
self.include_only_properties = include_only_properties
|
||||
|
||||
if not index_path:
|
||||
index_path = os.path.join(save_dir or '.', self.INDEX_FILENAME)
|
||||
|
||||
if not index:
|
||||
self.index = defaultdict(list)
|
||||
else:
|
||||
self.index = index
|
||||
|
||||
if not points_db_path:
|
||||
points_db_path = os.path.join(save_dir or '.', self.POINTS_DB_DIR)
|
||||
|
||||
if not points_db:
|
||||
self.points_db = LevelDB(points_db_path)
|
||||
else:
|
||||
self.points_db = points_db
|
||||
|
||||
self.precision = precision
|
||||
|
||||
self.i = 0
|
||||
|
||||
def create_index(self, overwrite=False):
|
||||
self.index = defaultdict(list)
|
||||
|
||||
def index_point(self, lat, lon):
|
||||
code = geohash.encode(lat, lon)[:self.precision]
|
||||
|
||||
for key in [code] + geohash.neighbors(code):
|
||||
self.index[key].append((self.i, lat, lon))
|
||||
|
||||
def add_point(self, lat, lon, properties, cache=False, include_only_properties=None):
|
||||
if include_only_properties is not None:
|
||||
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
|
||||
|
||||
self.index_point(lat, lon)
|
||||
self.points_db.Put(self.properties_key(self.i), json.dumps(properties))
|
||||
self.i += 1
|
||||
|
||||
def load_properties(self, filename):
|
||||
properties = json.load(open(filename))
|
||||
self.i = int(properties.get('num_polygons', self.i))
|
||||
self.precision = int(properties.get('precision', self.precision))
|
||||
|
||||
def save_properties(self, out_filename):
|
||||
out = open(out_filename, 'w')
|
||||
json.dump({'num_polygons': str(self.i),
|
||||
'precision': self.precision}, out)
|
||||
|
||||
def save_index(self):
|
||||
if not self.index_path:
|
||||
self.index_path = os.path.join(self.save_dir or '.', self.INDEX_FILENAME)
|
||||
json.dump(self.index, open(self.index_path, 'w'))
|
||||
|
||||
@classmethod
|
||||
def load_index(cls, d, index_name=None):
|
||||
return json.load(open(os.path.join(d, index_name or cls.INDEX_FILENAME)))
|
||||
|
||||
def properties_key(self, i):
|
||||
return 'props:{}'.format(i)
|
||||
|
||||
def save(self):
|
||||
self.save_index()
|
||||
self.save_properties(os.path.join(self.save_dir, self.DEFAULT_PROPS_FILENAME))
|
||||
|
||||
def haversine_distance(self, lat1, lon1, lat2, lon2, radius=EARTH_RADIUS_KM):
|
||||
"""Calculate the Haversine distance between two lat/lon pairs, given by:
|
||||
a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2)
|
||||
c = 2 ⋅ atan2( √a, √(1−a) )
|
||||
d = R ⋅ c
|
||||
|
||||
where R is the radius of the Earth (in kilometers). By default we use 6373 km,
|
||||
a radius optimized for calculating distances at approximately 39 degrees from
|
||||
the equator i.e. Washington, DC
|
||||
|
||||
:param lat1: first latitude
|
||||
:param lon1: first longitude (use negative range for longitudes West of the Prime Meridian)
|
||||
:param lat2: second latitude
|
||||
:param lon2: second longitude (use negative range for longitudes West of the Prime Meridian)
|
||||
:param radius: radius of the Earth in (miles|kilometers) depending on the desired units
|
||||
"""
|
||||
lat1 = math.radians(lat1)
|
||||
lat2 = math.radians(lat2)
|
||||
lon1 = math.radians(lon1)
|
||||
lon2 = math.radians(lon2)
|
||||
|
||||
dlon = lon2 - lon1
|
||||
dlat = lat2 - lat1
|
||||
a = (math.sin(dlat / 2.0)) ** 2 + math.cos(lat1) * math.cos(lat2) * (math.sin(dlon/2.0)) ** 2
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||
d = radius * c
|
||||
return d
|
||||
|
||||
def get_candidate_points(self, latitude, longitude):
|
||||
code = geohash.encode(latitude, longitude)[:self.precision]
|
||||
candidates = OrderedDict()
|
||||
|
||||
candidates.update([(k, None) for k in self.index.get(code, [])])
|
||||
|
||||
for neighbor in geohash.neighbors(code):
|
||||
candidates.update([(k, None) for k in self.index.get(neighbor, [])])
|
||||
|
||||
return candidates.keys()
|
||||
|
||||
def point_distances(self, latitude, longitude):
|
||||
candidates = self.get_candidate_points(latitude, longitude)
|
||||
return [(i, lat, lon, self.haversine_distance(latitude, longitude, lat, lon)) for i, lat, lon in candidates]
|
||||
|
||||
def nearest_n_points(self, latitude, longitude, n=2):
|
||||
distances = self.point_distances(latitude, longitude)
|
||||
if not distances:
|
||||
return None
|
||||
return sorted(distances, key=operator.itemgetter(-1))[:n]
|
||||
|
||||
def nearest_point(self, latitude, longitude):
|
||||
distances = self.nearest_n_points(latitude, longitude, n=1)
|
||||
if not distances:
|
||||
return None
|
||||
return distances[0]
|
||||
Reference in New Issue
Block a user