Initial fork commit
This commit is contained in:
0
scripts/geodata/polygons/__init__.py
Normal file
0
scripts/geodata/polygons/__init__.py
Normal file
26
scripts/geodata/polygons/area.py
Normal file
26
scripts/geodata/polygons/area.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import pyproj
|
||||
|
||||
from functools import partial
|
||||
from shapely.ops import transform
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def polygon_area(poly):
|
||||
return transform(
|
||||
partial(pyproj.transform,
|
||||
pyproj.Proj(init='EPSG:4326'),
|
||||
pyproj.Proj(proj='aea',
|
||||
lat1=poly.bounds[1],
|
||||
lat2=poly.bounds[2],
|
||||
)
|
||||
),
|
||||
poly
|
||||
).area
|
||||
|
||||
|
||||
def polygon_bounding_box_area(poly):
|
||||
bbox = poly.bounds
|
||||
p = Polygon([(bbox[0], bbox[3]), (bbox[0], bbox[1]),
|
||||
(bbox[2], bbox[1]), (bbox[2], bbox[3]),
|
||||
])
|
||||
return polygon_area(p)
|
||||
452
scripts/geodata/polygons/index.py
Normal file
452
scripts/geodata/polygons/index.py
Normal file
@@ -0,0 +1,452 @@
|
||||
import fiona
|
||||
import gc
|
||||
import geohash
|
||||
import os
|
||||
import rtree
|
||||
import six
|
||||
import ujson as json
|
||||
|
||||
from collections import OrderedDict, defaultdict
|
||||
from leveldb import LevelDB
|
||||
from lru import LRU
|
||||
from shapely.geometry import Point, Polygon, MultiPolygon
|
||||
from shapely.prepared import prep
|
||||
from shapely.geometry.geo import mapping
|
||||
|
||||
from geodata.polygons.area import polygon_bounding_box_area
|
||||
|
||||
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
||||
DEFAULT_PROPS_FILENAME = 'properties.json'
|
||||
|
||||
|
||||
class PolygonIndex(object):
|
||||
include_only_properties = None
|
||||
simplify_tolerance = 0.0001
|
||||
preserve_topology = True
|
||||
persistent_polygons = False
|
||||
cache_size = 0
|
||||
fix_invalid_polygons = False
|
||||
|
||||
INDEX_FILENAME = None
|
||||
POLYGONS_DB_DIR = 'polygons'
|
||||
|
||||
def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None,
|
||||
index_filename=None,
|
||||
polygons_db_path=None,
|
||||
include_only_properties=None):
|
||||
if save_dir:
|
||||
self.save_dir = save_dir
|
||||
else:
|
||||
self.save_dir = None
|
||||
|
||||
if not index_filename:
|
||||
index_filename = self.INDEX_FILENAME
|
||||
|
||||
self.index_path = os.path.join(save_dir or '.', index_filename)
|
||||
|
||||
if not index:
|
||||
self.create_index(overwrite=True)
|
||||
else:
|
||||
self.index = index
|
||||
|
||||
if include_only_properties and hasattr(include_only_properties, '__contains__'):
|
||||
self.include_only_properties = include_only_properties
|
||||
|
||||
if not polygons and not self.persistent_polygons:
|
||||
self.polygons = {}
|
||||
elif polygons and not self.persistent_polygons:
|
||||
self.polygons = polygons
|
||||
elif self.persistent_polygons and self.cache_size > 0:
|
||||
self.polygons = LRU(self.cache_size)
|
||||
if polygons:
|
||||
for key, value in six.iteritems(polygons):
|
||||
self.polygons[key] = value
|
||||
|
||||
self.cache_hits = 0
|
||||
self.cache_misses = 0
|
||||
|
||||
self.get_polygon = self.get_polygon_cached
|
||||
|
||||
if not polygons_db_path:
|
||||
polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)
|
||||
|
||||
if not polygons_db:
|
||||
self.polygons_db = LevelDB(polygons_db_path)
|
||||
else:
|
||||
self.polygons_db = polygons_db
|
||||
|
||||
self.setup()
|
||||
|
||||
self.i = 0
|
||||
|
||||
def create_index(self, overwrite=False):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def index_polygon(self, polygon):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def setup(self):
|
||||
pass
|
||||
|
||||
def clear_cache(self, garbage_collect=True):
|
||||
if self.persistent_polygons and self.cache_size > 0:
|
||||
self.polygons.clear()
|
||||
if garbage_collect:
|
||||
gc.collect()
|
||||
|
||||
def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None):
|
||||
if simplify_tolerance is None:
|
||||
simplify_tolerance = self.simplify_tolerance
|
||||
if preserve_topology is None:
|
||||
preserve_topology = self.preserve_topology
|
||||
return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
pass
|
||||
|
||||
def polygon_geojson(self, poly, properties):
|
||||
return {
|
||||
'type': 'Feature',
|
||||
'geometry': mapping(poly),
|
||||
}
|
||||
|
||||
def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
|
||||
if include_only_properties is not None:
|
||||
properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}
|
||||
|
||||
if not self.persistent_polygons or cache:
|
||||
self.polygons[self.i] = prep(poly)
|
||||
|
||||
if self.persistent_polygons:
|
||||
self.polygons_db.Put(self.polygon_key(self.i), json.dumps(self.polygon_geojson(poly, properties)))
|
||||
|
||||
self.polygons_db.Put(self.properties_key(self.i), json.dumps(properties))
|
||||
self.index_polygon_properties(properties)
|
||||
self.i += 1
|
||||
|
||||
@classmethod
|
||||
def create_from_shapefiles(cls, inputs, output_dir,
|
||||
index_filename=None,
|
||||
include_only_properties=None):
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
|
||||
for input_file in inputs:
|
||||
if include_only_properties is not None:
|
||||
include_props = include_only_properties.get(input_file, cls.include_only_properties)
|
||||
else:
|
||||
include_props = cls.include_only_properties
|
||||
|
||||
f = fiona.open(input_file)
|
||||
|
||||
index.add_geojson_like_file(f)
|
||||
|
||||
return index
|
||||
|
||||
@classmethod
|
||||
def fix_polygon(cls, poly):
|
||||
'''
|
||||
Coerce to valid polygon
|
||||
'''
|
||||
if not poly.is_valid:
|
||||
poly = poly.buffer(0)
|
||||
if not poly.is_valid:
|
||||
return None
|
||||
return poly
|
||||
|
||||
@classmethod
|
||||
def to_polygon(cls, coords, holes=None, test_point=None):
|
||||
'''
|
||||
Create shapely polygon from list of coordinate tuples if valid
|
||||
'''
|
||||
if not coords or len(coords) < 3:
|
||||
return None
|
||||
|
||||
# Fix for polygons crossing the 180th meridian
|
||||
lons = [lon for lon, lat in coords]
|
||||
if (max(lons) - min(lons) > 180):
|
||||
coords = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in coords]
|
||||
if holes:
|
||||
holes = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in holes]
|
||||
|
||||
poly = Polygon(coords, holes)
|
||||
try:
|
||||
if test_point is None:
|
||||
test_point = poly.representative_point()
|
||||
invalid = cls.fix_invalid_polygons and not poly.is_valid and not poly.contains(test_point)
|
||||
except Exception:
|
||||
invalid = True
|
||||
|
||||
if invalid:
|
||||
try:
|
||||
poly_fix = cls.fix_polygon(poly)
|
||||
|
||||
if poly_fix is not None and poly_fix.bounds and len(poly_fix.bounds) == 4 and poly_fix.is_valid and poly_fix.type == poly.type:
|
||||
if test_point is None:
|
||||
test_point = poly_fix.representative_point()
|
||||
|
||||
if poly_fix.contains(test_point):
|
||||
poly = poly_fix
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return poly
|
||||
|
||||
def add_geojson_like_record(self, rec, include_only_properties=None):
|
||||
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
|
||||
return
|
||||
poly_type = rec['geometry']['type']
|
||||
if poly_type == 'Polygon':
|
||||
coords = rec['geometry']['coordinates'][0]
|
||||
poly = self.to_polygon(coords)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
return
|
||||
self.index_polygon(poly)
|
||||
self.add_polygon(poly, rec['properties'], include_only_properties=include_only_properties)
|
||||
elif poly_type == 'MultiPolygon':
|
||||
polys = []
|
||||
poly_coords = rec['geometry']['coordinates']
|
||||
for coords in poly_coords:
|
||||
poly = self.to_polygon(coords[0])
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
polys.append(poly)
|
||||
self.index_polygon(poly)
|
||||
|
||||
self.add_polygon(MultiPolygon(polys), rec['properties'], include_only_properties=include_only_properties)
|
||||
else:
|
||||
return
|
||||
|
||||
def add_geojson_like_file(self, f, include_only_properties=None):
|
||||
'''
|
||||
Add either GeoJSON or a shapefile record to the index
|
||||
'''
|
||||
|
||||
for rec in f:
|
||||
self.add_geojson_like_record(rec, include_only_properties=include_only_properties)
|
||||
|
||||
@classmethod
|
||||
def create_from_geojson_files(cls, inputs, output_dir,
|
||||
index_filename=None,
|
||||
include_only_properties=None):
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
|
||||
for input_file in inputs:
|
||||
if include_only_properties is not None:
|
||||
include_props = include_only_properties.get(input_file, cls.include_only_properties)
|
||||
else:
|
||||
include_props = cls.include_only_properties
|
||||
|
||||
f = json.load(open(input_file))
|
||||
|
||||
index.add_geojson_like_file(f['features'], include_only_properties=include_props)
|
||||
|
||||
return index
|
||||
|
||||
def compact_polygons_db(self):
|
||||
self.polygons_db.CompactRange('\x00', '\xff')
|
||||
|
||||
def save(self):
|
||||
self.save_index()
|
||||
self.save_properties(os.path.join(self.save_dir, DEFAULT_PROPS_FILENAME))
|
||||
if not self.persistent_polygons:
|
||||
self.save_polygons(os.path.join(self.save_dir, DEFAULT_POLYS_FILENAME))
|
||||
self.compact_polygons_db()
|
||||
self.save_polygon_properties(self.save_dir)
|
||||
|
||||
def load_properties(self, filename):
|
||||
properties = json.load(open(filename))
|
||||
self.i = int(properties.get('num_polygons', self.i))
|
||||
|
||||
def save_properties(self, out_filename):
|
||||
out = open(out_filename, 'w')
|
||||
json.dump({'num_polygons': str(self.i)}, out)
|
||||
|
||||
def save_polygons(self, out_filename):
|
||||
out = open(out_filename, 'w')
|
||||
for i in xrange(self.i):
|
||||
poly = self.polygons[i]
|
||||
feature = {
|
||||
'type': 'Feature',
|
||||
'geometry': mapping(poly.context),
|
||||
}
|
||||
out.write(json.dumps(feature) + u'\n')
|
||||
|
||||
def save_index(self):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
pass
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def polygon_from_geojson(cls, feature):
|
||||
poly_type = feature['geometry']['type']
|
||||
if poly_type == 'Polygon':
|
||||
coords = feature['geometry']['coordinates']
|
||||
poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
|
||||
return poly
|
||||
elif poly_type == 'MultiPolygon':
|
||||
polys = []
|
||||
for coords in feature['geometry']['coordinates']:
|
||||
poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
|
||||
polys.append(poly)
|
||||
|
||||
return MultiPolygon(polys)
|
||||
|
||||
@classmethod
|
||||
def load_polygons(cls, filename):
|
||||
f = open(filename)
|
||||
polygons = {}
|
||||
cls.i = 0
|
||||
for line in f:
|
||||
feature = json.loads(line.rstrip())
|
||||
polygons[cls.i] = prep(cls.polygon_from_geojson(feature))
|
||||
cls.i += 1
|
||||
return polygons
|
||||
|
||||
@classmethod
|
||||
def load_index(cls, d, index_name=None):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
@classmethod
|
||||
def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME,
|
||||
properties_filename=DEFAULT_PROPS_FILENAME,
|
||||
polys_db_dir=POLYGONS_DB_DIR):
|
||||
index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
|
||||
if not cls.persistent_polygons:
|
||||
polys = cls.load_polygons(os.path.join(d, polys_filename))
|
||||
else:
|
||||
polys = None
|
||||
polygons_db = LevelDB(os.path.join(d, polys_db_dir))
|
||||
polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
|
||||
polygon_index.load_properties(os.path.join(d, properties_filename))
|
||||
polygon_index.load_polygon_properties(d)
|
||||
return polygon_index
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
raise NotImplementedError('Children must implement')
|
||||
|
||||
def get_properties(self, i):
|
||||
return json.loads(self.polygons_db.Get(self.properties_key(i)))
|
||||
|
||||
def get_polygon(self, i):
|
||||
return self.polygons[i]
|
||||
|
||||
def get_polygon_cached(self, i):
|
||||
poly = self.polygons.get(i, None)
|
||||
if poly is None:
|
||||
data = json.loads(self.polygons_db.Get(self.polygon_key(i)))
|
||||
poly = prep(self.polygon_from_geojson(data))
|
||||
self.polygons[i] = poly
|
||||
self.cache_misses += 1
|
||||
else:
|
||||
self.cache_hits += 1
|
||||
return poly
|
||||
|
||||
def __iter__(self):
|
||||
for i in xrange(self.i):
|
||||
yield self.get_properties(i), self.get_polygon(i)
|
||||
|
||||
def __len__(self):
|
||||
return self.i
|
||||
|
||||
def polygons_contain(self, candidates, point, return_all=False):
|
||||
containing = None
|
||||
if return_all:
|
||||
containing = []
|
||||
for i in candidates:
|
||||
poly = self.get_polygon(i)
|
||||
contains = poly.contains(point)
|
||||
if contains:
|
||||
properties = self.get_properties(i)
|
||||
if not return_all:
|
||||
return properties
|
||||
else:
|
||||
containing.append(properties)
|
||||
return containing
|
||||
|
||||
def polygon_key(self, i):
|
||||
return 'poly:{}'.format(i)
|
||||
|
||||
def properties_key(self, i):
|
||||
return 'props:{}'.format(i)
|
||||
|
||||
def point_in_poly(self, lat, lon, return_all=False):
|
||||
candidates = self.get_candidate_polygons(lat, lon)
|
||||
point = Point(lon, lat)
|
||||
return self.polygons_contain(candidates, point, return_all=return_all)
|
||||
|
||||
|
||||
class RTreePolygonIndex(PolygonIndex):
|
||||
INDEX_FILENAME = 'rtree'
|
||||
|
||||
def create_index(self, overwrite=False):
|
||||
self.index = rtree.index.Index(self.index_path, overwrite=overwrite)
|
||||
|
||||
def index_polygon(self, polygon):
|
||||
self.index.insert(self.i, polygon.bounds)
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
return OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
|
||||
|
||||
def save_index(self):
|
||||
# need to close index before loading it
|
||||
self.index.close()
|
||||
|
||||
@classmethod
|
||||
def load_index(cls, d, index_name=None):
|
||||
return rtree.index.Index(os.path.join(d, index_name or cls.INDEX_FILENAME))
|
||||
|
||||
|
||||
class GeohashPolygonIndex(PolygonIndex):
|
||||
# Reference: https://www.elastic.co/guide/en/elasticsearch/guide/current/geohashes.html
|
||||
GEOHASH_PRECISIONS = [
|
||||
(7, 152.8 ** 2),
|
||||
(6, 1200.0 * 610.0),
|
||||
(5, 4900.0 * 4900.0),
|
||||
(4, 39000.0 * 19500.0),
|
||||
(3, 156000.0 * 156000.0),
|
||||
(2, 1251000.0 * 625000.0),
|
||||
]
|
||||
|
||||
INDEX_FILENAME = 'index.json'
|
||||
|
||||
def create_index(self, overwrite=False):
|
||||
self.index = defaultdict(list)
|
||||
|
||||
def index_point(self, lat, lon, geohash_level):
|
||||
code = geohash.encode(lat, lon)[:geohash_level]
|
||||
|
||||
for key in [code] + geohash.neighbors(code):
|
||||
self.index[key].append(self.i)
|
||||
|
||||
def index_polygon(self, polygon):
|
||||
poly_area = polygon_bounding_box_area(polygon)
|
||||
for geohash_level, area in self.GEOHASH_PRECISIONS:
|
||||
if area * 9.0 >= poly_area:
|
||||
break
|
||||
|
||||
bbox = polygon.bounds
|
||||
lat, lon = (bbox[1] + bbox[3]) / 2.0, (bbox[0] + bbox[2]) / 2.0
|
||||
self.index_point(lat, lon, geohash_level)
|
||||
|
||||
def get_candidate_polygons(self, lat, lon, return_all=False):
|
||||
candidates = []
|
||||
code = geohash.encode(lat, lon)
|
||||
for level, area in self.GEOHASH_PRECISIONS:
|
||||
indices = self.index.get(code[:level])
|
||||
if not indices:
|
||||
continue
|
||||
candidates.extend(indices)
|
||||
if not return_all:
|
||||
break
|
||||
return candidates
|
||||
|
||||
def save_index(self):
|
||||
if not self.index_path:
|
||||
self.index_path = os.path.join(self.save_dir or '.', self.INDEX_FILENAME)
|
||||
json.dump(self.index, open(self.index_path, 'w'))
|
||||
|
||||
@classmethod
|
||||
def load_index(cls, d, index_name=None):
|
||||
return json.load(open(os.path.join(d, index_name or cls.INDEX_FILENAME)))
|
||||
243
scripts/geodata/polygons/language_polys.py
Normal file
243
scripts/geodata/polygons/language_polys.py
Normal file
@@ -0,0 +1,243 @@
|
||||
import argparse
|
||||
import operator
|
||||
import os
|
||||
import sys
|
||||
import ujson as json
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.polygons.index import *
|
||||
from geodata.i18n.languages import *
|
||||
from geodata.language_id.disambiguation import disambiguate_language, AMBIGUOUS_LANGUAGE, UNKNOWN_LANGUAGE, WELL_REPRESENTED_LANGUAGES
|
||||
|
||||
country_language_dir = os.path.join(LANGUAGES_DIR, 'countries')
|
||||
regional_language_dir = os.path.join(LANGUAGES_DIR, 'regional')
|
||||
|
||||
|
||||
class LanguagePolygonIndex(RTreePolygonIndex):
|
||||
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
|
||||
ADMIN_LEVELS_FILENAME = 'admin_levels.json'
|
||||
|
||||
include_only_properties = set([
|
||||
'qs_a0',
|
||||
'qs_iso_cc',
|
||||
'qs_a1',
|
||||
'qs_a1_lc',
|
||||
'qs_a1r',
|
||||
'qs_a1r_lc',
|
||||
'qs_level',
|
||||
'languages',
|
||||
'admin_level'
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def create_from_shapefiles(cls,
|
||||
admin0_shapefile,
|
||||
admin1_shapefile,
|
||||
admin1_region_file,
|
||||
output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
|
||||
init_languages()
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename)
|
||||
|
||||
i = 0
|
||||
|
||||
'''
|
||||
Ordering of the files is important here as we want to match
|
||||
the most granular admin polygon first for regional languages. Currently
|
||||
most regional languages as they would apply to street signage are regional in
|
||||
terms of an admin 1 level (states, provinces, regions)
|
||||
'''
|
||||
for input_file in (admin0_shapefile, admin1_region_file, admin1_shapefile):
|
||||
f = fiona.open(input_file)
|
||||
|
||||
for rec in f:
|
||||
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
|
||||
continue
|
||||
|
||||
country = rec['properties']['qs_iso_cc'].lower()
|
||||
properties = rec['properties']
|
||||
|
||||
admin_level = properties['qs_level']
|
||||
|
||||
level_num = None
|
||||
if admin_level == 'adm1':
|
||||
name_key = 'qs_a1'
|
||||
code_key = 'qs_a1_lc'
|
||||
level_num = 1
|
||||
elif admin_level == 'adm1_region':
|
||||
name_key = 'qs_a1r'
|
||||
code_key = 'qs_a1r_lc'
|
||||
level_num = 1
|
||||
elif admin_level == 'adm0':
|
||||
level_num = 0
|
||||
else:
|
||||
continue
|
||||
|
||||
assert level_num is not None
|
||||
|
||||
if admin_level != 'adm0':
|
||||
admin1 = properties.get(name_key)
|
||||
admin1_code = properties.get(code_key)
|
||||
|
||||
regional = None
|
||||
|
||||
if name_key:
|
||||
regional = get_regional_languages(country, name_key, admin1)
|
||||
|
||||
if code_key and not regional:
|
||||
regional = get_regional_languages(country, code_key, admin1_code)
|
||||
|
||||
if not regional:
|
||||
continue
|
||||
|
||||
if all((not default for lang, default in regional.iteritems())):
|
||||
languages = get_country_languages(country)
|
||||
languages.update(regional)
|
||||
languages = languages.items()
|
||||
else:
|
||||
languages = regional.items()
|
||||
else:
|
||||
languages = get_country_languages(country).items()
|
||||
|
||||
properties['languages'] = [{'lang': lang, 'default': default}
|
||||
for lang, default in languages]
|
||||
properties['admin_level'] = level_num
|
||||
|
||||
poly_type = rec['geometry']['type']
|
||||
if poly_type == 'Polygon':
|
||||
poly = cls.to_polygon(rec['geometry']['coordinates'][0])
|
||||
index.index_polygon(poly)
|
||||
poly = index.simplify_polygon(poly)
|
||||
index.add_polygon(poly, dict(rec['properties']))
|
||||
elif poly_type == 'MultiPolygon':
|
||||
polys = []
|
||||
for coords in rec['geometry']['coordinates']:
|
||||
poly = cls.to_polygon(coords[0])
|
||||
polys.append(poly)
|
||||
index.index_polygon(poly)
|
||||
|
||||
multi_poly = index.simplify_polygon(MultiPolygon(polys))
|
||||
index.add_polygon(multi_poly, dict(rec['properties']))
|
||||
else:
|
||||
continue
|
||||
|
||||
i += 1
|
||||
return index
|
||||
|
||||
@classmethod
|
||||
def create_with_quattroshapes(cls, quattroshapes_dir,
|
||||
output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
admin0_filename = os.path.join(quattroshapes_dir, 'qs_adm0.shp')
|
||||
admin1_filename = os.path.join(quattroshapes_dir, 'qs_adm1.shp')
|
||||
admin1r_filename = os.path.join(quattroshapes_dir, 'qs_adm1_region.shp')
|
||||
|
||||
return cls.create_from_shapefiles(admin0_filename, admin1_filename, admin1r_filename,
|
||||
output_dir, index_filename=index_filename,
|
||||
polys_filename=polys_filename)
|
||||
|
||||
def setup(self):
|
||||
self.admin_levels = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
self.admin_levels.append(properties['admin_level'])
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.admin_levels = json.load(open(os.path.join(d, self.ADMIN_LEVELS_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.admin_levels, open(os.path.join(d, self.ADMIN_LEVELS_FILENAME), 'w'))
|
||||
|
||||
def admin_level(self, i):
|
||||
return self.admin_levels[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
|
||||
return sorted(candidates, key=self.admin_level, reverse=True)
|
||||
|
||||
def country_and_languages(self, latitude, longitude):
|
||||
props = self.point_in_poly(latitude, longitude, return_all=True)
|
||||
if not props:
|
||||
return None, None, None
|
||||
|
||||
country = props[0]['qs_iso_cc'].lower()
|
||||
languages = []
|
||||
language_set = set()
|
||||
|
||||
have_regional = False
|
||||
|
||||
for p in props:
|
||||
for l in p['languages']:
|
||||
lang = l['lang']
|
||||
if lang not in language_set:
|
||||
language_set.add(lang)
|
||||
if p['admin_level'] > 0 and l['default']:
|
||||
have_regional = True
|
||||
elif have_regional:
|
||||
l = {'lang': l['lang'], 'default': 0}
|
||||
languages.append(l)
|
||||
|
||||
# Python's builtin sort is stable, so if there are two defaults, the first remains first
|
||||
# Since polygons are returned from the index ordered from smallest admin level to largest,
|
||||
# it means the default language of the region overrides the country default
|
||||
default_languages = sorted(languages, key=operator.itemgetter('default'), reverse=True)
|
||||
return country, default_languages, props
|
||||
|
||||
def best_country_and_language(self, latitude, longitude, name):
|
||||
country, candidate_languages, language_props = self.country_and_languages(latitude, longitude)
|
||||
if not (country and candidate_languages):
|
||||
return None, None
|
||||
|
||||
num_langs = len(candidate_languages)
|
||||
default_langs = set([l['lang'] for l in candidate_languages if l.get('default')])
|
||||
num_defaults = len(default_langs)
|
||||
|
||||
regional_defaults = 0
|
||||
country_defaults = 0
|
||||
regional_langs = set()
|
||||
country_langs = set()
|
||||
for p in language_props:
|
||||
if p['admin_level'] > 0:
|
||||
regional_defaults += sum((1 for lang in p['languages'] if lang.get('default')))
|
||||
regional_langs |= set([l['lang'] for l in p['languages']])
|
||||
else:
|
||||
country_defaults += sum((1 for lang in p['languages'] if lang.get('default')))
|
||||
country_langs |= set([l['lang'] for l in p['languages']])
|
||||
|
||||
if num_langs == 1:
|
||||
return country, candidate_languages[0]['lang']
|
||||
else:
|
||||
lang = disambiguate_language(name, [(l['lang'], l['default']) for l in candidate_languages])
|
||||
default_lang = candidate_languages[0]['lang']
|
||||
|
||||
if lang == UNKNOWN_LANGUAGE and num_defaults == 1:
|
||||
return country, default_lang
|
||||
elif lang == AMBIGUOUS_LANGUAGE:
|
||||
return country, lang
|
||||
elif lang != UNKNOWN_LANGUAGE:
|
||||
if lang != default_lang and lang in country_langs and country_defaults > 1 and regional_defaults > 0 and lang in WELL_REPRESENTED_LANGUAGES:
|
||||
return country, UNKNOWN_LANGUAGE
|
||||
return country, lang
|
||||
else:
|
||||
return country, lang
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Handle argument parsing here
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('-q', '--quattroshapes-dir',
|
||||
help='Path to quattroshapes dir')
|
||||
|
||||
parser.add_argument('-o', '--out-dir',
|
||||
default=os.getcwd(),
|
||||
help='Output directory')
|
||||
|
||||
args = parser.parse_args()
|
||||
index = LanguagePolygonIndex.create_with_quattroshapes(args.quattroshapes_dir, args.out_dir)
|
||||
index.save()
|
||||
589
scripts/geodata/polygons/reverse_geocode.py
Normal file
589
scripts/geodata/polygons/reverse_geocode.py
Normal file
@@ -0,0 +1,589 @@
|
||||
'''
|
||||
reverse_geocode.py
|
||||
-------------------
|
||||
|
||||
In-memory reverse geocoder using polygons from Quattroshapes or OSM.
|
||||
This should be useful for filling in the blanks both in constructing
|
||||
training data from OSM addresses and for OpenVenues.
|
||||
|
||||
Usage:
|
||||
python reverse_geocode.py -o /data/quattroshapes/rtree/reverse -q /data/quattroshapes/
|
||||
python reverse_geocode.py -o /data/quattroshapes/rtree/reverse -a /data/osm/planet-admin-borders.osm
|
||||
'''
|
||||
import argparse
|
||||
import logging
|
||||
import operator
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
import shutil
|
||||
import six
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from functools import partial
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.coordinates.conversion import latlon_to_decimal
|
||||
from geodata.countries.constants import Countries
|
||||
from geodata.encoding import safe_decode
|
||||
from geodata.file_utils import ensure_dir, download_file
|
||||
from geodata.i18n.unicode_properties import get_chars_by_script
|
||||
from geodata.i18n.languages import *
|
||||
from geodata.i18n.word_breaks import ideographic_scripts
|
||||
from geodata.names.deduping import NameDeduper
|
||||
from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS
|
||||
from geodata.osm.admin_boundaries import *
|
||||
from geodata.polygons.index import *
|
||||
from geodata.statistics.tf_idf import IDFIndex
|
||||
|
||||
from geodata.text.tokenize import tokenize, token_types
|
||||
from geodata.text.normalize import *
|
||||
|
||||
from shapely.topology import TopologicalError
|
||||
|
||||
decode_latin1 = partial(safe_decode, encoding='latin1')
|
||||
|
||||
|
||||
def str_id(v):
|
||||
v = int(v)
|
||||
if v <= 0:
|
||||
return None
|
||||
return str(v)
|
||||
|
||||
|
||||
class QuattroshapesReverseGeocoder(RTreePolygonIndex):
|
||||
'''
|
||||
Quattroshapes polygons, for levels up to localities, are relatively
|
||||
accurate and provide concordance with GeoPlanet and in some cases
|
||||
GeoNames (which is used in other parts of this project).
|
||||
'''
|
||||
COUNTRIES_FILENAME = 'qs_adm0.shp'
|
||||
ADMIN1_FILENAME = 'qs_adm1.shp'
|
||||
ADMIN1_REGION_FILENAME = 'qs_adm1_region.shp'
|
||||
ADMIN2_FILENAME = 'qs_adm2.shp'
|
||||
ADMIN2_REGION_FILENAME = 'qs_adm2_region.shp'
|
||||
LOCAL_ADMIN_FILENAME = 'qs_localadmin.shp'
|
||||
LOCALITIES_FILENAME = 'qs_localities.shp'
|
||||
NEIGHBORHOODS_FILENAME = 'qs_neighborhoods.shp'
|
||||
|
||||
COUNTRY = 'adm0'
|
||||
ADMIN1 = 'adm1'
|
||||
ADMIN1_REGION = 'adm1_region'
|
||||
ADMIN2 = 'adm2'
|
||||
ADMIN2_REGION = 'adm2_region'
|
||||
LOCAL_ADMIN = 'localadmin'
|
||||
LOCALITY = 'locality'
|
||||
NEIGHBORHOOD = 'neighborhood'
|
||||
|
||||
PRIORITIES_FILENAME = 'priorities.json'
|
||||
|
||||
persistent_polygons = True
|
||||
cache_size = 100000
|
||||
|
||||
sorted_levels = (COUNTRY,
|
||||
ADMIN1_REGION,
|
||||
ADMIN1,
|
||||
ADMIN2_REGION,
|
||||
ADMIN2,
|
||||
LOCAL_ADMIN,
|
||||
LOCALITY,
|
||||
NEIGHBORHOOD,
|
||||
)
|
||||
|
||||
sort_levels = {k: i for i, k in enumerate(sorted_levels)}
|
||||
|
||||
NAME = 'name'
|
||||
CODE = 'code'
|
||||
LEVEL = 'level'
|
||||
GEONAMES_ID = 'geonames_id'
|
||||
WOE_ID = 'woe_id'
|
||||
|
||||
polygon_properties = {
|
||||
COUNTRIES_FILENAME: {
|
||||
NAME: ('qs_a0', safe_decode),
|
||||
CODE: ('qs_iso_cc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
ADMIN1_FILENAME: {
|
||||
NAME: ('qs_a1', safe_decode),
|
||||
CODE: ('qs_a1_lc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
ADMIN1_REGION_FILENAME: {
|
||||
NAME: ('qs_a1r', safe_decode),
|
||||
CODE: ('qs_a1r_lc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
ADMIN2_FILENAME: {
|
||||
NAME: ('qs_a2', decode_latin1),
|
||||
CODE: ('qs_a2_lc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
ADMIN2_REGION_FILENAME: {
|
||||
NAME: ('qs_a2r', safe_decode),
|
||||
CODE: ('qs_a2r_lc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
LOCAL_ADMIN_FILENAME: {
|
||||
NAME: ('qs_la', safe_decode),
|
||||
CODE: ('qs_la_lc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str_id),
|
||||
WOE_ID: ('qs_woe_id', str_id),
|
||||
},
|
||||
LOCALITIES_FILENAME: {
|
||||
NAME: ('qs_loc', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('qs_gn_id', str),
|
||||
WOE_ID: ('qs_woe_id', str),
|
||||
},
|
||||
NEIGHBORHOODS_FILENAME: {
|
||||
NAME: ('name', safe_decode),
|
||||
CODE: ('name_en', safe_decode),
|
||||
LEVEL: ('qs_level', safe_decode),
|
||||
GEONAMES_ID: ('gn_id', str_id),
|
||||
WOE_ID: ('woe_id', str_id),
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_from_shapefiles(cls,
|
||||
input_files,
|
||||
output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME,
|
||||
use_all_props=False):
|
||||
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename)
|
||||
|
||||
for input_file in input_files:
|
||||
f = fiona.open(input_file)
|
||||
|
||||
filename = os.path.split(input_file)[-1]
|
||||
|
||||
aliases = cls.polygon_properties.get(filename)
|
||||
|
||||
if not use_all_props:
|
||||
include_props = aliases
|
||||
else:
|
||||
include_props = None
|
||||
|
||||
for rec in f:
|
||||
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
|
||||
continue
|
||||
|
||||
properties = rec['properties']
|
||||
|
||||
if filename == cls.NEIGHBORHOODS_FILENAME:
|
||||
properties['qs_level'] = 'neighborhood'
|
||||
|
||||
have_all_props = False
|
||||
for k, (prop, func) in aliases.iteritems():
|
||||
v = properties.get(prop, None)
|
||||
if v is not None:
|
||||
try:
|
||||
properties[k] = func(v)
|
||||
except Exception:
|
||||
break
|
||||
else:
|
||||
have_all_props = True
|
||||
if not have_all_props or not properties.get(cls.NAME):
|
||||
continue
|
||||
|
||||
poly_type = rec['geometry']['type']
|
||||
if poly_type == 'Polygon':
|
||||
poly = cls.to_polygon(rec['geometry']['coordinates'][0])
|
||||
index.index_polygon(poly)
|
||||
poly = index.simplify_polygon(poly)
|
||||
index.add_polygon(poly, dict(rec['properties']), include_only_properties=include_props)
|
||||
elif poly_type == 'MultiPolygon':
|
||||
polys = []
|
||||
for coords in rec['geometry']['coordinates']:
|
||||
poly = cls.to_polygon(coords[0])
|
||||
polys.append(poly)
|
||||
index.index_polygon(poly)
|
||||
|
||||
multi_poly = index.simplify_polygon(MultiPolygon(polys))
|
||||
index.add_polygon(multi_poly, dict(rec['properties']), include_only_properties=include_props)
|
||||
else:
|
||||
continue
|
||||
|
||||
return index
|
||||
|
||||
@classmethod
|
||||
def create_with_quattroshapes(cls, quattroshapes_dir,
|
||||
output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
|
||||
admin0_filename = os.path.join(quattroshapes_dir, cls.COUNTRIES_FILENAME)
|
||||
admin1_filename = os.path.join(quattroshapes_dir, cls.ADMIN1_FILENAME)
|
||||
admin1r_filename = os.path.join(quattroshapes_dir, cls.ADMIN1_REGION_FILENAME)
|
||||
admin2_filename = os.path.join(quattroshapes_dir, cls.ADMIN2_FILENAME)
|
||||
admin2r_filename = os.path.join(quattroshapes_dir, cls.ADMIN2_REGION_FILENAME)
|
||||
localities_filename = os.path.join(quattroshapes_dir, cls.LOCALITIES_FILENAME)
|
||||
|
||||
return cls.create_from_shapefiles([admin0_filename, admin1_filename, admin1r_filename,
|
||||
admin2_filename, admin2r_filename,
|
||||
localities_filename],
|
||||
output_dir, index_filename=index_filename,
|
||||
polys_filename=polys_filename)
|
||||
|
||||
def setup(self):
|
||||
self.priorities = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
self.priorities.append(self.sort_levels.get(properties[self.LEVEL], 0))
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.priorities = json.load(open(os.path.join(d, self.PRIORITIES_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.priorities, open(os.path.join(d, self.PRIORITIES_FILENAME), 'w'))
|
||||
|
||||
def sort_level(self, i):
|
||||
return self.priorities[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = super(QuattroshapesReverseGeocoder, self).get_candidate_polygons(lat, lon)
|
||||
return sorted(candidates, key=self.sort_level, reverse=True)
|
||||
|
||||
|
||||
class OSMReverseGeocoder(RTreePolygonIndex):
|
||||
'''
|
||||
OSM has among the best, most carefully-crafted, accurate administrative
|
||||
polygons in the business in addition to using local language naming
|
||||
conventions which is desirable for creating a truly multilingual address
|
||||
parser.
|
||||
|
||||
The storage of these polygons is byzantine. See geodata.osm.osm_admin_boundaries
|
||||
for more details.
|
||||
|
||||
Suffice to say, this reverse geocoder builds an R-tree index on OSM planet
|
||||
in a reasonable amount of memory using arrays of C integers and binary search
|
||||
for the dependency lookups and Tarjan's algorithm for finding strongly connected
|
||||
components to stitch together the polygons.
|
||||
'''
|
||||
|
||||
ADMIN_LEVEL = 'admin_level'
|
||||
|
||||
ADMIN_LEVELS_FILENAME = 'admin_levels.json'
|
||||
|
||||
polygon_reader = OSMAdminPolygonReader
|
||||
|
||||
persistent_polygons = True
|
||||
# Cache almost everything
|
||||
cache_size = 250000
|
||||
simplify_polygons = False
|
||||
|
||||
fix_invalid_polygons = True
|
||||
|
||||
include_property_patterns = set([
|
||||
'id',
|
||||
'type',
|
||||
'name',
|
||||
'name:*',
|
||||
'ISO3166-1:alpha2',
|
||||
'ISO3166-1:alpha3',
|
||||
'ISO3166-2',
|
||||
'int_name',
|
||||
'official_name',
|
||||
'official_name:*',
|
||||
'alt_name',
|
||||
'alt_name:*',
|
||||
'short_name',
|
||||
'short_name:*',
|
||||
'admin_level',
|
||||
'place',
|
||||
'population',
|
||||
'designation',
|
||||
'wikipedia',
|
||||
'wikipedia:*',
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def create_from_osm_file(cls, filename, output_dir,
|
||||
index_filename=None,
|
||||
polys_filename=DEFAULT_POLYS_FILENAME):
|
||||
'''
|
||||
Given an OSM file (planet or some other bounds) containing relations
|
||||
and their dependencies, create an R-tree index for coarse-grained
|
||||
reverse geocoding.
|
||||
|
||||
Note: the input file is expected to have been created using
|
||||
osmfilter. Use fetch_osm_address_data.sh for planet or copy the
|
||||
admin borders commands if using other bounds.
|
||||
'''
|
||||
index = cls(save_dir=output_dir, index_filename=index_filename)
|
||||
|
||||
reader = cls.polygon_reader(filename)
|
||||
polygons = reader.polygons()
|
||||
|
||||
logger = logging.getLogger('osm.reverse_geocode')
|
||||
|
||||
for element_id, props, admin_center, outer_polys, inner_polys in polygons:
|
||||
props = {k: v for k, v in six.iteritems(props)
|
||||
if k in cls.include_property_patterns or (six.u(':') in k and
|
||||
six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)}
|
||||
|
||||
id_type, element_id = osm_type_and_id(element_id)
|
||||
|
||||
test_point = None
|
||||
|
||||
if admin_center:
|
||||
admin_center_props = {k: v for k, v in six.iteritems(admin_center)
|
||||
if k in ('id', 'type', 'lat', 'lon') or k in cls.include_property_patterns or (six.u(':') in k and
|
||||
six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)}
|
||||
|
||||
if cls.fix_invalid_polygons:
|
||||
center_lat, center_lon = latlon_to_decimal(admin_center_props['lat'], admin_center_props['lon'])
|
||||
test_point = Point(center_lon, center_lat)
|
||||
|
||||
props['admin_center'] = admin_center_props
|
||||
|
||||
if inner_polys and not outer_polys:
|
||||
logger.warn('inner polygons with no outer')
|
||||
continue
|
||||
if len(outer_polys) == 1 and not inner_polys:
|
||||
poly = cls.to_polygon(outer_polys[0])
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
if poly.type != 'MultiPolygon':
|
||||
index.index_polygon(poly)
|
||||
else:
|
||||
for p in poly:
|
||||
index.index_polygon(p)
|
||||
else:
|
||||
multi = []
|
||||
inner = []
|
||||
# Validate inner polygons (holes)
|
||||
for p in inner_polys:
|
||||
poly = cls.to_polygon(p)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4 or not poly.is_valid:
|
||||
continue
|
||||
|
||||
if poly.type != 'MultiPolygon':
|
||||
inner.append(poly)
|
||||
else:
|
||||
inner.extend(poly)
|
||||
|
||||
# Validate outer polygons
|
||||
for p in outer_polys:
|
||||
poly = cls.to_polygon(p, test_point=test_point)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
|
||||
interior = []
|
||||
try:
|
||||
# Figure out which outer polygon contains each inner polygon
|
||||
interior = [p2 for p2 in inner if poly.contains(p2)]
|
||||
except TopologicalError:
|
||||
continue
|
||||
|
||||
if interior:
|
||||
# Polygon with holes constructor
|
||||
poly = cls.to_polygon(p, [zip(*p2.exterior.coords.xy) for p2 in interior], test_point=test_point)
|
||||
if poly is None or not poly.bounds or len(poly.bounds) != 4:
|
||||
continue
|
||||
# R-tree only stores the bounding box, so add the whole polygon
|
||||
if poly.type != 'MultiPolygon':
|
||||
index.index_polygon(poly)
|
||||
multi.append(poly)
|
||||
else:
|
||||
for p in poly:
|
||||
index.index_polygon(p)
|
||||
multi.extend(poly)
|
||||
|
||||
if len(multi) > 1:
|
||||
poly = MultiPolygon(multi)
|
||||
elif multi:
|
||||
poly = multi[0]
|
||||
else:
|
||||
continue
|
||||
if index.simplify_polygons:
|
||||
poly = index.simplify_polygon(poly)
|
||||
index.add_polygon(poly, props)
|
||||
|
||||
return index
|
||||
|
||||
def setup(self):
|
||||
self.admin_levels = []
|
||||
|
||||
def index_polygon_properties(self, properties):
|
||||
admin_level = properties.get(self.ADMIN_LEVEL, 0)
|
||||
try:
|
||||
admin_level = int(admin_level)
|
||||
except ValueError:
|
||||
admin_level = 0
|
||||
self.admin_levels.append(admin_level)
|
||||
|
||||
def load_polygon_properties(self, d):
|
||||
self.admin_levels = json.load(open(os.path.join(d, self.ADMIN_LEVELS_FILENAME)))
|
||||
|
||||
def save_polygon_properties(self, d):
|
||||
json.dump(self.admin_levels, open(os.path.join(d, self.ADMIN_LEVELS_FILENAME), 'w'))
|
||||
|
||||
def sort_level(self, i):
|
||||
return self.admin_levels[i]
|
||||
|
||||
def get_candidate_polygons(self, lat, lon):
|
||||
candidates = super(OSMReverseGeocoder, self).get_candidate_polygons(lat, lon)
|
||||
return sorted(candidates, key=self.sort_level, reverse=True)
|
||||
|
||||
|
||||
class OSMSubdivisionReverseGeocoder(OSMReverseGeocoder):
|
||||
persistent_polygons = True
|
||||
cache_size = 10000
|
||||
simplify_polygons = False
|
||||
polygon_reader = OSMSubdivisionPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['landuse', 'place', 'amenity'])
|
||||
|
||||
|
||||
class OSMBuildingReverseGeocoder(OSMReverseGeocoder):
|
||||
persistent_polygons = True
|
||||
cache_size = 10000
|
||||
simplify_polygons = False
|
||||
|
||||
fix_invalid_polygons = False
|
||||
|
||||
polygon_reader = OSMBuildingPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['building', 'building:levels', 'building:part', 'addr:*'])
|
||||
|
||||
|
||||
class OSMPostalCodeReverseGeocoder(OSMReverseGeocoder):
|
||||
persistent_polygons = True
|
||||
cache_size = 10000
|
||||
simplify_polygons = False
|
||||
polygon_reader = OSMPostalCodesPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['postal_code'])
|
||||
|
||||
|
||||
class OSMAirportReverseGeocoder(OSMReverseGeocoder):
|
||||
persistent_polygons = True
|
||||
cache_size = 10000
|
||||
simplify_polygons = False
|
||||
polygon_reader = OSMAirportsPolygonReader
|
||||
include_property_patterns = OSMReverseGeocoder.include_property_patterns | set(['iata', 'aerodrome', 'aerodrome:type', 'city_served'])
|
||||
|
||||
|
||||
class OSMCountryReverseGeocoder(OSMReverseGeocoder):
|
||||
persistent_polygons = True
|
||||
cache_size = 10000
|
||||
simplify_polygons = False
|
||||
polygon_reader = OSMCountryPolygonReader
|
||||
|
||||
@classmethod
|
||||
def country_and_languages_from_components(cls, osm_components):
|
||||
country = None
|
||||
for c in osm_components:
|
||||
country = c.get('ISO3166-1:alpha2')
|
||||
if country:
|
||||
break
|
||||
else:
|
||||
# See if there's an ISO3166-2 code that matches
|
||||
# in case the country polygon is wacky
|
||||
for c in osm_components:
|
||||
admin1 = c.get('ISO3166-2')
|
||||
if admin1:
|
||||
# If so, and if the country is valid, use that
|
||||
country = admin1[:2]
|
||||
if not Countries.is_valid_country_code(country.lower()):
|
||||
return None, []
|
||||
break
|
||||
|
||||
country = country.lower()
|
||||
|
||||
regional = None
|
||||
|
||||
for c in osm_components:
|
||||
place_id = '{}:{}'.format(c.get('type', 'relation'), c.get('id', '0'))
|
||||
|
||||
regional = get_regional_languages(country, 'osm', place_id)
|
||||
|
||||
if regional:
|
||||
break
|
||||
|
||||
languages = []
|
||||
if not regional:
|
||||
languages = get_country_languages(country).items()
|
||||
else:
|
||||
if not all(regional.values()):
|
||||
languages = get_country_languages(country)
|
||||
languages.update(regional)
|
||||
languages = languages.items()
|
||||
else:
|
||||
languages = regional.items()
|
||||
|
||||
default_languages = sorted(languages, key=operator.itemgetter(1), reverse=True)
|
||||
|
||||
return country, default_languages
|
||||
|
||||
def country_and_languages(self, lat, lon):
|
||||
osm_components = self.point_in_poly(lat, lon, return_all=True)
|
||||
return self.country_and_languages_from_components(osm_components)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Handle argument parsing here
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('-q', '--quattroshapes-dir',
|
||||
help='Path to quattroshapes dir')
|
||||
|
||||
parser.add_argument('-a', '--osm-admin-file',
|
||||
help='Path to OSM borders file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-s', '--osm-subdivisions-file',
|
||||
help='Path to OSM subdivisions file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-b', '--osm-building-polygons-file',
|
||||
help='Path to OSM building polygons file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-p', '--osm-postal-code-polygons-file',
|
||||
help='Path to OSM postal code polygons file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-r', '--osm-airport-polygons-file',
|
||||
help='Path to OSM airport polygons file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-c', '--osm-country-polygons-file',
|
||||
help='Path to OSM country polygons file (with dependencies, .osm format)')
|
||||
|
||||
parser.add_argument('-o', '--out-dir',
|
||||
default=os.getcwd(),
|
||||
help='Output directory')
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.osm_admin_file:
|
||||
index = OSMReverseGeocoder.create_from_osm_file(args.osm_admin_file, args.out_dir)
|
||||
elif args.osm_subdivisions_file:
|
||||
index = OSMSubdivisionReverseGeocoder.create_from_osm_file(args.osm_subdivisions_file, args.out_dir)
|
||||
elif args.osm_building_polygons_file:
|
||||
index = OSMBuildingReverseGeocoder.create_from_osm_file(args.osm_building_polygons_file, args.out_dir)
|
||||
elif args.osm_country_polygons_file:
|
||||
index = OSMCountryReverseGeocoder.create_from_osm_file(args.osm_country_polygons_file, args.out_dir)
|
||||
elif args.osm_postal_code_polygons_file:
|
||||
index = OSMPostalCodeReverseGeocoder.create_from_osm_file(args.osm_postal_code_polygons_file, args.out_dir)
|
||||
elif args.osm_airport_polygons_file:
|
||||
index = OSMAirportReverseGeocoder.create_from_osm_file(args.osm_airport_polygons_file, args.out_dir)
|
||||
elif args.quattroshapes_dir:
|
||||
index = QuattroshapesReverseGeocoder.create_with_quattroshapes(args.quattroshapes_dir, args.out_dir)
|
||||
else:
|
||||
parser.error('Must specify quattroshapes dir or osm admin borders file')
|
||||
|
||||
index.save()
|
||||
Reference in New Issue
Block a user