[polygons] Adding address_normalizer polygons package

This commit is contained in:
Al
2015-06-15 17:55:27 -04:00
parent 22fa81b33f
commit ccb64f7ac2
3 changed files with 250 additions and 0 deletions

View File

View File

@@ -0,0 +1,138 @@
import fiona
import os
import rtree
import ujson as json
from collections import OrderedDict
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.prepared import prep
from shapely.geometry.geo import mapping
DEFAULT_INDEX_FILENAME = 'rtree'
DEFAULT_POLYS_FILENAME = 'polygons.geojson'
class RTreePolygonIndex(object):
include_only_properties = None
def __init__(self, index=None, polygons=None, save_dir=None,
index_filename=None,
include_only_properties=None):
if save_dir:
self.save_dir = save_dir
else:
self.save_dir = None
if index_filename:
self.index_path = os.path.join(save_dir or '.', index_filename)
else:
self.index_path = None
if not index and self.index_path:
self.index = rtree.index.Index(self.index_path)
elif not index:
self.index = rtree.index.Index()
else:
self.index = index
if include_only_properties and hasattr(include_only_properties, '__contains__'):
self.include_only_properties = include_only_properties
if not polygons:
self.polygons = []
else:
self.polygons = polygons
def index_polygon(self, id, polygon):
self.index.insert(id, polygon.bounds)
def add_polygon(self, poly, properties, simplify_tolerance=0.0001, preserve_topology=True):
poly = poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)
if self.include_only_properties:
properties = {k: v for k, v in properties.iteritems() if k in self.include_only_properties}
self.polygons.append((properties, prep(poly)))
@classmethod
def create_from_shapefiles(cls, inputs, output_dir,
index_filename=DEFAULT_INDEX_FILENAME,
polys_filename=DEFAULT_POLYS_FILENAME):
index = cls(save_dir=output_dir, index_filename=index_filename)
for input_file in inputs:
f = fiona.open(input_file)
i = 0
for rec in f:
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
continue
poly_type = rec['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(rec['geometry']['coordinates'][0])
index.index_polygon(i, poly)
index.add_polygon(poly, rec['properties'])
elif poly_type == 'MultiPolygon':
polys = []
for coords in rec['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
index.index_polygon(i, poly)
index.add_polygon(MultiPolygon(polys), rec['properties'])
i += 1
return index
def save_polygons(self, out_filename):
out = open(out_filename, 'w')
features = []
for props, poly in self.polygons:
features.append({
'type': 'Feature',
'geometry': mapping(poly.context),
'properties': props
})
json.dump({'type': 'FeatureCollection',
'features': features},
out)
def save(self, polys_filename=DEFAULT_POLYS_FILENAME):
self.save_polygons(os.path.join(self.save_dir, polys_filename))
# need to close index before loading it
self.index.close()
@classmethod
def load_polygons(cls, filename):
feature_collection = json.load(open(filename))
polygons = []
for feature in feature_collection['features']:
poly_type = feature['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(feature['geometry']['coordinates'][0])
polygons.append((feature['properties'], prep(poly)))
elif poly_type == 'MultiPolygon':
polys = []
for coords in feature['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
polygons.append((feature['properties'], prep(MultiPolygon(polys))))
return polygons
@classmethod
def load(cls, d, index_name=DEFAULT_INDEX_FILENAME, polys_filename=DEFAULT_POLYS_FILENAME):
index = rtree.index.Index(os.path.join(d, index_name))
polys = cls.load_polygons(os.path.join(d, polys_filename))
return cls(index=index, polygons=polys, save_dir=d)
def get_candidate_polygons(self, lat, lon):
return OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys()
def point_in_poly(self, lat, lon):
polys = self.get_candidate_polygons(lat, lon)
pt = Point(lon, lat)
for i in polys:
props, poly = self.polygons[i]
if poly.contains(pt):
return props
return None

View File

@@ -0,0 +1,112 @@
import csv
import os
this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.polygons.index import *
from geodara.text.languages import *
country_language_dir = os.path.join(LANGUAGES_DIR, 'countries')
regional_language_dir = os.path.join(LANGUAGES_DIR, 'regional')
class LanguagePolygonIndex(RTreePolygonIndex):
include_only_properties = set([
'qs_a0',
'qs_iso_cc',
'qs_a1',
'qs_a1_lc',
'qs_a1r',
'qs_a1r_lc',
'qs_level',
'languages',
])
@classmethod
def create_from_shapefiles(cls,
admin0_shapefile,
admin1_shapefile,
admin1_region_file,
output_dir,
index_filename=DEFAULT_INDEX_FILENAME,
polys_filename=DEFAULT_POLYS_FILENAME):
init_languages()
index = cls(save_dir=output_dir, index_filename=index_filename)
i = 0
'''
Ordering of the files is important here as we want to match
the most granular admin polygon first for regional languages. Currently
most regional languages as they would apply to street signage are regional in
terms of an admin 1 level (states, provinces, regions)
'''
for input_file in (admin0_shapefile, admin1_region_file, admin1_shapefile):
f = fiona.open(input_file)
for rec in f:
if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
continue
country = rec['properties']['qs_iso_cc'].lower()
properties = rec['properties']
admin_level = properties['qs_level']
if admin_level == 'adm1':
name_key = 'qs_a1'
code_key = 'qs_a1_lc'
elif admin_level == 'adm1_region':
name_key = 'qs_a1r'
code_key = 'qs_a1r_lc'
elif admin_level != 'adm0':
continue
if admin_level != 'adm0':
admin1 = properties.get(name_key)
admin1_code = properties.get(code_key)
regional_lang = None
is_default = None
if name_key:
regional_lang, is_default = regional_languages.get((country, name_key, admin1), (None, None))
if code_key and not regional_lang:
regional_lang, is_default = regional_languages.get((country, code_key, admin1_code), (None, None))
if not regional_lang:
continue
languages = [(lang, is_default) for lang in regional_lang.split(',')]
else:
languages = official_languages[country].items()
overrides = road_language_overrides.get(country)
if overrides and overrides.values()[0]:
languages = overrides.items()
elif overrides:
languages.extend(overrides.items())
properties['languages'] = [{'lang': lang, 'default': default}
for lang, default in languages]
poly_type = rec['geometry']['type']
if poly_type == 'Polygon':
poly = Polygon(rec['geometry']['coordinates'][0])
index.index_polygon(i, poly)
index.add_polygon(poly, dict(rec['properties']))
elif poly_type == 'MultiPolygon':
polys = []
for coords in rec['geometry']['coordinates']:
poly = Polygon(coords[0])
polys.append(poly)
index.index_polygon(i, poly)
index.add_polygon(MultiPolygon(polys), dict(rec['properties']))
else:
continue
i += 1
return index