[osm] Neighborhood index now uses OSM admin R-tree to check whether nodes that are otherwise classified as non-suburbs (e.g. Santa Monica is a city) but may still match one of the neighborhood data sets, are excluded from the final neighborhood index
This commit is contained in:
@@ -11,6 +11,7 @@ import sys
|
|||||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||||
|
|
||||||
|
from geodata.address_formatting.formatter import AddressFormatter
|
||||||
from geodata.coordinates.conversion import latlon_to_decimal
|
from geodata.coordinates.conversion import latlon_to_decimal
|
||||||
from geodata.encoding import safe_decode
|
from geodata.encoding import safe_decode
|
||||||
from geodata.file_utils import ensure_dir, download_file
|
from geodata.file_utils import ensure_dir, download_file
|
||||||
@@ -20,7 +21,8 @@ from geodata.names.deduping import NameDeduper
|
|||||||
from geodata.osm.definitions import osm_definitions
|
from geodata.osm.definitions import osm_definitions
|
||||||
from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS
|
from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS
|
||||||
from geodata.polygons.index import *
|
from geodata.polygons.index import *
|
||||||
from geodata.polygons.reverse_geocode import QuattroshapesReverseGeocoder
|
from geodata.polygons.language_polys import LanguagePolygonIndex
|
||||||
|
from geodata.polygons.reverse_geocode import QuattroshapesReverseGeocoder, OSMReverseGeocoder
|
||||||
from geodata.statistics.tf_idf import IDFIndex
|
from geodata.statistics.tf_idf import IDFIndex
|
||||||
|
|
||||||
|
|
||||||
@@ -233,7 +235,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
|||||||
return doc
|
return doc
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_from_osm_and_quattroshapes(cls, filename, quattroshapes_dir, output_dir, scratch_dir=SCRATCH_DIR):
|
def create_from_osm_and_quattroshapes(cls, filename, quattroshapes_dir, language_rtree_dir, osm_rtree_dir, output_dir, scratch_dir=SCRATCH_DIR):
|
||||||
'''
|
'''
|
||||||
Given an OSM file (planet or some other bounds) containing neighborhoods
|
Given an OSM file (planet or some other bounds) containing neighborhoods
|
||||||
as points (some suburbs have boundaries)
|
as points (some suburbs have boundaries)
|
||||||
@@ -259,6 +261,10 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
|||||||
logger.info('Creating ClickThatHood neighborhoods')
|
logger.info('Creating ClickThatHood neighborhoods')
|
||||||
cth = ClickThatHoodReverseGeocoder.create_neighborhoods_index()
|
cth = ClickThatHoodReverseGeocoder.create_neighborhoods_index()
|
||||||
|
|
||||||
|
language_rtree = LanguagePolygonIndex.load(language_rtree_dir)
|
||||||
|
|
||||||
|
osm_admin_rtree = OSMReverseGeocoder.load(osm_rtree_dir)
|
||||||
|
|
||||||
logger.info('Creating IDF index')
|
logger.info('Creating IDF index')
|
||||||
idf = IDFIndex()
|
idf = IDFIndex()
|
||||||
|
|
||||||
@@ -300,7 +306,16 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
|||||||
props['type'] = id_type
|
props['type'] = id_type
|
||||||
props['id'] = element_id
|
props['id'] = element_id
|
||||||
|
|
||||||
possible_neighborhood = osm_definitions.meets_definition(attrs, osm_defintiions.LOCALITY)
|
possible_neighborhood = osm_definitions.meets_definition(attrs, osm_defintions.NEIGHBORHOOD)
|
||||||
|
|
||||||
|
country, candidate_languages, language_props = language_rtree.country_and_languages(lat, lon)
|
||||||
|
component_name = None
|
||||||
|
for k, v in six.iteritems(attrs):
|
||||||
|
component_name = osm_address_components.get_component(country, k, v)
|
||||||
|
if component_name:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
component_name = None
|
||||||
|
|
||||||
ranks = []
|
ranks = []
|
||||||
osm_names = []
|
osm_names = []
|
||||||
@@ -313,6 +328,26 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
|
|||||||
for name_key in OSM_NAME_TAGS:
|
for name_key in OSM_NAME_TAGS:
|
||||||
osm_names.extend([v for k, v in six.iteritems(attrs) if k.startswith('{}:'.format(name_key))])
|
osm_names.extend([v for k, v in six.iteritems(attrs) if k.startswith('{}:'.format(name_key))])
|
||||||
|
|
||||||
|
if component_name and component_name != AddressFormatter.SUBURB:
|
||||||
|
existing_osm_candidates = osm_admin_rtree.get_candidate_polygons(lat, lon)
|
||||||
|
skip_node = False
|
||||||
|
for i in existing_osm_candidates:
|
||||||
|
props = osm_admin_rtree.get_properties(i)
|
||||||
|
containing_component = None
|
||||||
|
name = props.get('name')
|
||||||
|
# Only exact name matches here since we're comparins OSM to OSM
|
||||||
|
if name and name == attrs.get('name'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
containing_component = osm_components.get_first_component(country, props)
|
||||||
|
|
||||||
|
if containing_component != AddressFormatter.SUBURB:
|
||||||
|
skip_node = True
|
||||||
|
break
|
||||||
|
# Skip this element
|
||||||
|
if skip_node:
|
||||||
|
continue
|
||||||
|
|
||||||
for idx in (cth, qs):
|
for idx in (cth, qs):
|
||||||
candidates = idx.get_candidate_polygons(lat, lon, return_all=True)
|
candidates = idx.get_candidate_polygons(lat, lon, return_all=True)
|
||||||
|
|
||||||
@@ -439,6 +474,12 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument('-q', '--quattroshapes-dir',
|
parser.add_argument('-q', '--quattroshapes-dir',
|
||||||
help='Path to quattroshapes dir')
|
help='Path to quattroshapes dir')
|
||||||
|
|
||||||
|
parser.add_argument('-a', '--osm-admin-rtree-dir',
|
||||||
|
help='Path to OSM admin rtree dir')
|
||||||
|
|
||||||
|
parser.add_argument('-l', '--language-rtree-dir',
|
||||||
|
help='Path to language rtree dir')
|
||||||
|
|
||||||
parser.add_argument('-n', '--osm-neighborhoods-file',
|
parser.add_argument('-n', '--osm-neighborhoods-file',
|
||||||
help='Path to OSM neighborhoods file (no dependencies, .osm format)')
|
help='Path to OSM neighborhoods file (no dependencies, .osm format)')
|
||||||
|
|
||||||
@@ -449,10 +490,12 @@ if __name__ == '__main__':
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.osm_neighborhoods_file and args.quattroshapes_dir:
|
if args.osm_neighborhoods_file and args.quattroshapes_dir and args.osm_admin_rtree_dir and args.language_rtree_dir:
|
||||||
index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes(
|
index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes(
|
||||||
args.osm_neighborhoods_file,
|
args.osm_neighborhoods_file,
|
||||||
args.quattroshapes_dir,
|
args.quattroshapes_dir,
|
||||||
|
args.language_rtree_dir,
|
||||||
|
args.osm_rtree_dir,
|
||||||
args.out_dir
|
args.out_dir
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user