diff --git a/scripts/geodata/neighborhoods/reverse_geocode.py b/scripts/geodata/neighborhoods/reverse_geocode.py index e74a7f24..adb4c741 100644 --- a/scripts/geodata/neighborhoods/reverse_geocode.py +++ b/scripts/geodata/neighborhoods/reverse_geocode.py @@ -17,7 +17,7 @@ from geodata.file_utils import ensure_dir, download_file from geodata.i18n.unicode_properties import get_chars_by_script from geodata.i18n.word_breaks import ideographic_scripts from geodata.names.deduping import NameDeduper -from geodata.osm.extract import parse_osm, OSM_NAME_TAGS, WAY_OFFSET, RELATION_OFFSET +from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS from geodata.polygons.index import * from geodata.polygons.reverse_geocode import QuattroshapesReverseGeocoder from geodata.statistics.tf_idf import IDFIndex @@ -269,7 +269,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): logger.info('Matching OSM points to neighborhood polygons') # Parse OSM and match neighborhood/suburb points to Quattroshapes/Zetashapes polygons num_polys = 0 - for node_id, attrs, deps in parse_osm(filename): + for element_id, attrs, deps in parse_osm(filename): try: lat, lon = latlon_to_decimal(attrs['lat'], attrs['lon']) except ValueError: @@ -279,6 +279,11 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): if not osm_name: continue + id_type, element_id = osm_type_and_id(element_id) + + props['type'] = id_type + props['id'] = element_id + is_neighborhood = attrs.get('place') == 'neighbourhood' ranks = [] @@ -349,6 +354,8 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): else: attrs['polygon_type'] = 'local_admin' + + attrs['source'] = source index.index_polygon(poly) index.add_polygon(poly, attrs) diff --git a/scripts/geodata/osm/extract.py b/scripts/geodata/osm/extract.py index ed2e62ec..5cda49bc 100644 --- a/scripts/geodata/osm/extract.py +++ b/scripts/geodata/osm/extract.py @@ -23,8 +23,12 @@ from geodata.encoding import safe_decode WAY_OFFSET = 10 ** 15 RELATION_OFFSET = 2 * 10 ** 15 -ALL_OSM_TAGS = set(['node', 'way', 'relation']) -WAYS_RELATIONS = set(['way', 'relation']) +NODE = 'node' +WAY = 'way' +RELATION = 'relation' + +ALL_OSM_TAGS = set([NODE, WAY, RELATION]) +WAYS_RELATIONS = set([WAY, RELATION]) OSM_NAME_TAGS = ( 'name', @@ -87,6 +91,20 @@ def parse_osm(filename, allowed_types=ALL_OSM_TAGS, dependencies=False): while elem.getprevious() is not None: del elem.getparent()[0] + +def osm_type_and_id(element_id): + element_id = long(element_id) + if element_id >= RELATION_OFFSET: + id_type = RELATION + element_id -= RELATION_OFFSET + elif element_id >= WAY_OFFSET: + id_type = WAY + element_id -= WAY_OFFSET + else: + id_type = NODE + + return id_type, element_id + apposition_regex = re.compile('(.*[^\s])[\s]*\([\s]*(.*[^\s])[\s]*\)$', re.I) html_parser = HTMLParser.HTMLParser() diff --git a/scripts/geodata/polygons/reverse_geocode.py b/scripts/geodata/polygons/reverse_geocode.py index 034008c5..20fa7bae 100644 --- a/scripts/geodata/polygons/reverse_geocode.py +++ b/scripts/geodata/polygons/reverse_geocode.py @@ -33,7 +33,7 @@ from geodata.file_utils import ensure_dir, download_file from geodata.i18n.unicode_properties import get_chars_by_script from geodata.i18n.word_breaks import ideographic_scripts from geodata.names.deduping import NameDeduper -from geodata.osm.extract import parse_osm, OSM_NAME_TAGS, WAY_OFFSET, RELATION_OFFSET +from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS from geodata.osm.admin_boundaries import OSMAdminPolygonReader, OSMSubdivisionPolygonReader, OSMBuildingPolygonReader from geodata.polygons.index import * from geodata.statistics.tf_idf import IDFIndex @@ -331,15 +331,8 @@ class OSMReverseGeocoder(RTreePolygonIndex): if k in cls.include_property_patterns or (six.u(':') in k and six.u('{}:*').format(k.split(six.u(':'), 1)[0]) in cls.include_property_patterns)} - if element_id >= RELATION_OFFSET: - props['type'] = 'relation' - element_id -= RELATION_OFFSET - elif element_id >= WAY_OFFSET: - props['type'] = 'way' - element_id -= WAY_OFFSET - else: - props['type'] = 'node' - + id_type, element_id = osm_type_and_id(element_id) + props['type'] = id_type props['id'] = element_id if inner_polys and not outer_polys: