From 9f37a26a6d3ced3845a3be213fc7ad3e85c06abb Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 27 Apr 2016 01:43:47 -0400 Subject: [PATCH] [points] Adding point reverse geocoding index --- scripts/geodata/points/__init__.py | 0 scripts/geodata/points/index.py | 159 +++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 scripts/geodata/points/__init__.py create mode 100644 scripts/geodata/points/index.py diff --git a/scripts/geodata/points/__init__.py b/scripts/geodata/points/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/geodata/points/index.py b/scripts/geodata/points/index.py new file mode 100644 index 00000000..5f606eb7 --- /dev/null +++ b/scripts/geodata/points/index.py @@ -0,0 +1,159 @@ +import geohash +import os +import math +import operator +import six +import ujson as json + +from collections import defaultdict, OrderedDict + +from leveldb import LevelDB + +EARTH_RADIUS_KM = 6373 # in km + + +class PointIndex(object): + include_only_properties = None + persistent_index = False + cache_size = 0 + + INDEX_FILENAME = None + POINTS_DB_DIR = 'points' + + DEFAULT_GEOHASH_PRECISION = 7 + DEFAULT_PROPS_FILENAME = 'properties.json' + + INDEX_FILENAME = 'index.json' + + def __init__(self, index=None, + points=None, + points_db=None, save_dir=None, + points_db_path=None, + index_path=None, + include_only_properties=None, + precision=DEFAULT_GEOHASH_PRECISION): + if save_dir: + self.save_dir = save_dir + else: + self.save_dir = None + + if include_only_properties and hasattr(include_only_properties, '__contains__'): + self.include_only_properties = include_only_properties + + if not index_path: + index_path = os.path.join(save_dir or '.', self.INDEX_FILENAME) + + if not index: + self.index = defaultdict(list) + else: + self.index = index + + if not points_db_path: + points_db_path = os.path.join(save_dir or '.', self.POINTS_DB_DIR) + + if not points_db: + self.points_db = LevelDB(points_db_path) + else: + self.points_db = points_db + + self.precision = precision + + self.i = 0 + + def create_index(self, overwrite=False): + self.index = defaultdict(list) + + def index_point(self, lat, lon): + code = geohash.encode(lat, lon)[:self.precision] + + for key in [code] + geohash.neighbors(code): + self.index[key].append((self.i, lat, lon)) + + def add_point(self, lat, lon, properties, cache=False, include_only_properties=None): + if include_only_properties is not None: + properties = {k: v for k, v in properties.iteritems() if k in include_only_properties} + + self.index_point(lat, lon) + self.points_db.Put(self.properties_key(self.i), json.dumps(properties)) + self.i += 1 + + def load_properties(self, filename): + properties = json.load(open(filename)) + self.i = int(properties.get('num_polygons', self.i)) + self.precision = int(properties.get('precision', self.precision)) + + def save_properties(self, out_filename): + out = open(out_filename, 'w') + json.dump({'num_polygons': str(self.i), + 'precision': self.precision}, out) + + def save_index(self): + if not self.index_path: + self.index_path = os.path.join(self.save_dir or '.', self.INDEX_FILENAME) + json.dump(self.index, open(self.index_path, 'w')) + + @classmethod + def load_index(cls, d, index_name=None): + return json.load(open(os.path.join(d, index_name or cls.INDEX_FILENAME))) + + def properties_key(self, i): + return 'props:{}'.format(i) + + def save(self): + self.save_index() + self.save_properties(os.path.join(self.save_dir, self.DEFAULT_PROPS_FILENAME)) + + def haversine_distance(self, lat1, lon1, lat2, lon2, radius=EARTH_RADIUS_KM): + """Calculate the Haversine distance between two lat/lon pairs, given by: + a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2) + c = 2 ⋅ atan2( √a, √(1−a) ) + d = R ⋅ c + + where R is the radius of the Earth (in kilometers). By default we use 6373 km, + a radius optimized for calculating distances at approximately 39 degrees from + the equator i.e. Washington, DC + + :param lat1: first latitude + :param lon1: first longitude (use negative range for longitudes West of the Prime Meridian) + :param lat2: second latitude + :param lon2: second longitude (use negative range for longitudes West of the Prime Meridian) + :param radius: radius of the Earth in (miles|kilometers) depending on the desired units + """ + lat1 = math.radians(lat1) + lat2 = math.radians(lat2) + lon1 = math.radians(lon1) + lon2 = math.radians(lon2) + + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = (math.sin(dlat / 2.0)) ** 2 + math.cos(lat1) * math.cos(lat2) * (math.sin(dlon/2.0)) ** 2 + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + d = radius * c + return d + + def get_candidate_points(self, latitude, longitude): + code = geohash.encode(latitude, longitude)[:self.precision] + candidates = OrderedDict() + + candidates.update([(k, None) for k in self.index.get(code, [])]) + + for neighbor in geohash.neighbors(code): + candidates.update([(k, None) for k in self.index.get(neighbor, [])]) + + return candidates.keys() + + def point_distances(self, latitude, longitude): + candidates = self.get_candidate_points(latitude, longitude) + return [(i, lat, lon, self.haversine_distance(latitude, longitude, lat, lon)) for i, lat, lon in candidates] + + def nearest_n_points(self, latitude, longitude, n=2): + distances = self.point_distances(latitude, longitude) + if not distances: + return None + return sorted(distances, key=operator.itemgetter(-1))[:n] + + def nearest_point(self, latitude, longitude): + distances = self.nearest_n_points(latitude, longitude, n=1) + if not distances: + return None + return distances[0]