From f87f0df7177f0c36cb594a7349af9c9dc89d15cd Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 5 Dec 2016 02:01:46 -0500 Subject: [PATCH] [places] adding generic place index for reverse geocoding to points --- .../geodata/metro_stations/reverse_geocode.py | 71 +---------- scripts/geodata/places/reverse_geocode.py | 116 ++++++++++++++++++ 2 files changed, 122 insertions(+), 65 deletions(-) create mode 100644 scripts/geodata/places/reverse_geocode.py diff --git a/scripts/geodata/metro_stations/reverse_geocode.py b/scripts/geodata/metro_stations/reverse_geocode.py index 483cbdd2..7d16e036 100644 --- a/scripts/geodata/metro_stations/reverse_geocode.py +++ b/scripts/geodata/metro_stations/reverse_geocode.py @@ -11,78 +11,19 @@ from geodata.address_expansions.abbreviations import abbreviate from geodata.coordinates.conversion import latlon_to_decimal from geodata.math.floats import isclose from geodata.osm.extract import parse_osm -from geodata.points.index import PointIndex +from geodata.places.reverse_geocode import PlaceReverseGeocoder from geodata.encoding import safe_decode -class MetroStationReverseGeocoder(PointIndex): - include_property_patterns = set([ - 'name', - 'name:*', +class MetroStationReverseGeocoder(PlaceReverseGeocoder): + GEOHASH_PRECISION = 7 + + include_property_patterns = PlaceReverseGeocoder.include_property_patterns | set([ 'operator', 'network', 'station', - 'int_name', - 'official_name', - 'official_name:*', - 'alt_name', - 'alt_name:*', - 'short_name', - 'short_name:*', - 'place', - 'description', - 'wikipedia', - 'wikipedia:*', ]) - def nearest_metro(self, lat, lon, n=1): - nearest = self.nearest_point(lat, lon) - if not nearest: - return None - - @classmethod - def create_from_osm_file(cls, filename, output_dir, precision=PointIndex.GEOHASH_PRECISION): - ''' - Given an OSM file (planet or some other bounds) containing relations - and their dependencies, create an R-tree index for coarse-grained - reverse geocoding. - - Note: the input file is expected to have been created using - osmfilter. Use fetch_osm_address_data.sh for planet or copy the - admin borders commands if using other bounds. - ''' - index = cls(save_dir=output_dir, precision=precision) - - i = 0 - for element_id, props, deps in parse_osm(filename): - props = {safe_decode(k): safe_decode(v) for k, v in six.iteritems(props)} - - node_id = long(element_id.split(':')[-1]) - lat = props.get('lat') - lon = props.get('lon') - if lat is None or lon is None: - continue - lat, lon = latlon_to_decimal(lat, lon) - if lat is None or lon is None: - continue - - if isclose(lon, 180.0): - lon = 179.999 - - props = {k: v for k, v in six.iteritems(props) - if k in ('id', 'type') or k in cls.include_property_patterns or (six.u(':') in k and - six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)} - - props['type'] = 'node' - props['id'] = node_id - - index.add_point(lat, lon, props) - - if i % 1000 == 0 and i > 0: - print('did {} points'.format(i)) - i += 1 - - return index if __name__ == '__main__': # Handle argument parsing here @@ -93,7 +34,7 @@ if __name__ == '__main__': parser.add_argument('-p', '--precision', type=int, - default=PointIndex.GEOHASH_PRECISION, + default=MetroStationReverseGeocoder.GEOHASH_PRECISION, help='Geohash precision') parser.add_argument('-o', '--out-dir', diff --git a/scripts/geodata/places/reverse_geocode.py b/scripts/geodata/places/reverse_geocode.py new file mode 100644 index 00000000..b3645d16 --- /dev/null +++ b/scripts/geodata/places/reverse_geocode.py @@ -0,0 +1,116 @@ +import argparse +import logging +import os +import sys +import six + +this_dir = os.path.realpath(os.path.dirname(__file__)) +sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir))) + +from geodata.address_expansions.abbreviations import abbreviate +from geodata.coordinates.conversion import latlon_to_decimal +from geodata.math.floats import isclose +from geodata.osm.extract import parse_osm +from geodata.points.index import PointIndex +from geodata.encoding import safe_decode + + +class PlaceReverseGeocoder(PointIndex): + GEOHASH_PRECISION = 5 + + include_property_patterns = set([ + 'id', + 'type', + 'name', + 'name:*', + 'ISO3166-1:alpha2', + 'ISO3166-1:alpha3', + 'int_name', + 'is_in:*', + 'official_name', + 'official_name:*', + 'alt_name', + 'alt_name:*', + 'short_name', + 'short_name:*', + 'admin_level', + 'place', + 'population', + 'designation', + 'description', + 'wikipedia', + 'wikipedia:*', + ]) + + @classmethod + def create_from_osm_file(cls, filename, output_dir, precision=None): + ''' + Given an OSM file (planet or some other bounds) containing relations + and their dependencies, create an R-tree index for coarse-grained + reverse geocoding. + + Note: the input file is expected to have been created using + osmfilter. Use fetch_osm_address_data.sh for planet or copy the + admin borders commands if using other bounds. + ''' + if precision is None: + precision = cls.GEOHASH_PRECISION + + index = cls(save_dir=output_dir, precision=precision) + + i = 0 + for element_id, props, deps in parse_osm(filename): + props = {safe_decode(k): safe_decode(v) for k, v in six.iteritems(props)} + + node_id = long(element_id.split(':')[-1]) + lat = props.get('lat') + lon = props.get('lon') + if lat is None or lon is None: + continue + lat, lon = latlon_to_decimal(lat, lon) + if lat is None or lon is None: + continue + + if isclose(lon, 180.0): + lon = 179.999 + + props = {k: v for k, v in six.iteritems(props) + if k in ('id', 'type') or k in cls.include_property_patterns or (six.u(':') in k and + six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)} + + props['type'] = 'node' + props['id'] = node_id + + index.add_point(lat, lon, props) + + if i % 1000 == 0 and i > 0: + print('did {} points'.format(i)) + i += 1 + + return index + +if __name__ == '__main__': + # Handle argument parsing here + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--osm-places-file', + help='Path to OSM metro stations file') + + parser.add_argument('-p', '--precision', + type=int, + default=PlaceReverseGeocoder.GEOHASH_PRECISION, + help='Geohash precision') + + parser.add_argument('-o', '--out-dir', + default=os.getcwd(), + help='Output directory') + + logging.basicConfig(level=logging.INFO) + + args = parser.parse_args() + if args.osm_places_file: + index = PlaceReverseGeocoder.create_from_osm_file(args.osm_places_file, args.out_dir, precision=args.precision) + else: + parser.error('Must specify places file') + + index.save()