From ccb64f7ac2bf3d1cd0b70bdb2b1d3d6cab97361c Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 15 Jun 2015 17:55:27 -0400 Subject: [PATCH] [polygons] Adding address_normalizer polygons package --- scripts/geodata/polygons/__init__.py | 0 scripts/geodata/polygons/index.py | 138 +++++++++++++++++++++ scripts/geodata/polygons/language_polys.py | 112 +++++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 scripts/geodata/polygons/__init__.py create mode 100644 scripts/geodata/polygons/index.py create mode 100644 scripts/geodata/polygons/language_polys.py diff --git a/scripts/geodata/polygons/__init__.py b/scripts/geodata/polygons/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/geodata/polygons/index.py b/scripts/geodata/polygons/index.py new file mode 100644 index 00000000..52602193 --- /dev/null +++ b/scripts/geodata/polygons/index.py @@ -0,0 +1,138 @@ +import fiona +import os +import rtree +import ujson as json + +from collections import OrderedDict +from shapely.geometry import Point, Polygon, MultiPolygon +from shapely.prepared import prep +from shapely.geometry.geo import mapping + +DEFAULT_INDEX_FILENAME = 'rtree' +DEFAULT_POLYS_FILENAME = 'polygons.geojson' + + +class RTreePolygonIndex(object): + include_only_properties = None + + def __init__(self, index=None, polygons=None, save_dir=None, + index_filename=None, + include_only_properties=None): + if save_dir: + self.save_dir = save_dir + else: + self.save_dir = None + + if index_filename: + self.index_path = os.path.join(save_dir or '.', index_filename) + else: + self.index_path = None + + if not index and self.index_path: + self.index = rtree.index.Index(self.index_path) + elif not index: + self.index = rtree.index.Index() + else: + self.index = index + + if include_only_properties and hasattr(include_only_properties, '__contains__'): + self.include_only_properties = include_only_properties + + if not polygons: + self.polygons = [] + else: + self.polygons = polygons + + def index_polygon(self, id, polygon): + self.index.insert(id, polygon.bounds) + + def add_polygon(self, poly, properties, simplify_tolerance=0.0001, preserve_topology=True): + poly = poly.simplify(simplify_tolerance, preserve_topology=preserve_topology) + if self.include_only_properties: + properties = {k: v for k, v in properties.iteritems() if k in self.include_only_properties} + + self.polygons.append((properties, prep(poly))) + + @classmethod + def create_from_shapefiles(cls, inputs, output_dir, + index_filename=DEFAULT_INDEX_FILENAME, + polys_filename=DEFAULT_POLYS_FILENAME): + index = cls(save_dir=output_dir, index_filename=index_filename) + for input_file in inputs: + f = fiona.open(input_file) + + i = 0 + + for rec in f: + if not rec or not rec.get('geometry') or 'type' not in rec['geometry']: + continue + poly_type = rec['geometry']['type'] + if poly_type == 'Polygon': + poly = Polygon(rec['geometry']['coordinates'][0]) + index.index_polygon(i, poly) + index.add_polygon(poly, rec['properties']) + elif poly_type == 'MultiPolygon': + polys = [] + for coords in rec['geometry']['coordinates']: + poly = Polygon(coords[0]) + polys.append(poly) + index.index_polygon(i, poly) + + index.add_polygon(MultiPolygon(polys), rec['properties']) + i += 1 + return index + + def save_polygons(self, out_filename): + out = open(out_filename, 'w') + features = [] + for props, poly in self.polygons: + features.append({ + 'type': 'Feature', + 'geometry': mapping(poly.context), + 'properties': props + }) + json.dump({'type': 'FeatureCollection', + 'features': features}, + out) + + def save(self, polys_filename=DEFAULT_POLYS_FILENAME): + self.save_polygons(os.path.join(self.save_dir, polys_filename)) + # need to close index before loading it + self.index.close() + + @classmethod + def load_polygons(cls, filename): + feature_collection = json.load(open(filename)) + polygons = [] + for feature in feature_collection['features']: + poly_type = feature['geometry']['type'] + + if poly_type == 'Polygon': + poly = Polygon(feature['geometry']['coordinates'][0]) + polygons.append((feature['properties'], prep(poly))) + elif poly_type == 'MultiPolygon': + polys = [] + for coords in feature['geometry']['coordinates']: + poly = Polygon(coords[0]) + polys.append(poly) + + polygons.append((feature['properties'], prep(MultiPolygon(polys)))) + return polygons + + @classmethod + def load(cls, d, index_name=DEFAULT_INDEX_FILENAME, polys_filename=DEFAULT_POLYS_FILENAME): + index = rtree.index.Index(os.path.join(d, index_name)) + polys = cls.load_polygons(os.path.join(d, polys_filename)) + return cls(index=index, polygons=polys, save_dir=d) + + def get_candidate_polygons(self, lat, lon): + return OrderedDict.fromkeys(self.index.intersection((lon, lat, lon, lat))).keys() + + def point_in_poly(self, lat, lon): + polys = self.get_candidate_polygons(lat, lon) + pt = Point(lon, lat) + for i in polys: + props, poly = self.polygons[i] + if poly.contains(pt): + return props + return None \ No newline at end of file diff --git a/scripts/geodata/polygons/language_polys.py b/scripts/geodata/polygons/language_polys.py new file mode 100644 index 00000000..801f89c7 --- /dev/null +++ b/scripts/geodata/polygons/language_polys.py @@ -0,0 +1,112 @@ +import csv +import os + +this_dir = os.path.realpath(os.path.dirname(__file__)) +sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir))) + +from geodata.polygons.index import * +from geodara.text.languages import * + +country_language_dir = os.path.join(LANGUAGES_DIR, 'countries') +regional_language_dir = os.path.join(LANGUAGES_DIR, 'regional') + + +class LanguagePolygonIndex(RTreePolygonIndex): + + include_only_properties = set([ + 'qs_a0', + 'qs_iso_cc', + 'qs_a1', + 'qs_a1_lc', + 'qs_a1r', + 'qs_a1r_lc', + 'qs_level', + 'languages', + ]) + + @classmethod + def create_from_shapefiles(cls, + admin0_shapefile, + admin1_shapefile, + admin1_region_file, + output_dir, + index_filename=DEFAULT_INDEX_FILENAME, + polys_filename=DEFAULT_POLYS_FILENAME): + + init_languages() + index = cls(save_dir=output_dir, index_filename=index_filename) + + i = 0 + + ''' + Ordering of the files is important here as we want to match + the most granular admin polygon first for regional languages. Currently + most regional languages as they would apply to street signage are regional in + terms of an admin 1 level (states, provinces, regions) + ''' + for input_file in (admin0_shapefile, admin1_region_file, admin1_shapefile): + f = fiona.open(input_file) + + for rec in f: + if not rec or not rec.get('geometry') or 'type' not in rec['geometry']: + continue + + country = rec['properties']['qs_iso_cc'].lower() + properties = rec['properties'] + + admin_level = properties['qs_level'] + + if admin_level == 'adm1': + name_key = 'qs_a1' + code_key = 'qs_a1_lc' + elif admin_level == 'adm1_region': + name_key = 'qs_a1r' + code_key = 'qs_a1r_lc' + elif admin_level != 'adm0': + continue + + if admin_level != 'adm0': + admin1 = properties.get(name_key) + admin1_code = properties.get(code_key) + + regional_lang = None + is_default = None + if name_key: + regional_lang, is_default = regional_languages.get((country, name_key, admin1), (None, None)) + + if code_key and not regional_lang: + regional_lang, is_default = regional_languages.get((country, code_key, admin1_code), (None, None)) + + if not regional_lang: + continue + + languages = [(lang, is_default) for lang in regional_lang.split(',')] + else: + languages = official_languages[country].items() + overrides = road_language_overrides.get(country) + if overrides and overrides.values()[0]: + languages = overrides.items() + elif overrides: + languages.extend(overrides.items()) + + properties['languages'] = [{'lang': lang, 'default': default} + for lang, default in languages] + + poly_type = rec['geometry']['type'] + if poly_type == 'Polygon': + poly = Polygon(rec['geometry']['coordinates'][0]) + index.index_polygon(i, poly) + index.add_polygon(poly, dict(rec['properties'])) + elif poly_type == 'MultiPolygon': + polys = [] + for coords in rec['geometry']['coordinates']: + poly = Polygon(coords[0]) + polys.append(poly) + index.index_polygon(i, poly) + + index.add_polygon(MultiPolygon(polys), dict(rec['properties'])) + else: + continue + + i += 1 + return index