[osm] Adding OSM file for places stored as nodes. Adding a general venue definition accessible from the geodata Python package. OSM definitions expand simple variables so can reuse/combine definitions in the bash script

This commit is contained in:
Al
2016-07-06 15:40:04 -04:00
parent 79e1d7639b
commit c91950ea6c
3 changed files with 51 additions and 16 deletions

View File

@@ -300,7 +300,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
props['type'] = id_type
props['id'] = element_id
is_neighborhood = osm_definitions.meets_definition(attrs, osm_defintiions.NEIGHBORHOOD)
possible_neighborhood = osm_definitions.meets_definition(attrs, osm_defintiions.LOCALITY)
ranks = []
osm_names = []
@@ -338,7 +338,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex):
name = pattern.sub(repl, name)
normalized_qs_names[i] = name
if is_neighborhood and idx is qs and props.get(QuattroshapesReverseGeocoder.LEVEL) != 'neighborhood':
if possible_neighborhood and idx is qs and props.get(QuattroshapesReverseGeocoder.LEVEL) != 'neighborhood':
continue
if not contains_ideographs:

View File

@@ -4,11 +4,14 @@ import six
from collections import defaultdict
from geodata.graph.topsort import topsort
this_dir = os.path.realpath(os.path.dirname(__file__))
DEFAULT_SCRIPT_PATH = os.path.join(this_dir, 'fetch_osm_address_data.sh')
valid_key_regex = re.compile('VALID_(.*?)_KEYS="(.*)"')
variable_regex = re.compile(r'\$VALID_(.*?)_KEYS(?=\b)')
kv_regex = re.compile('([^\s]*)=([^\s]*)')
@@ -16,32 +19,54 @@ class OSMDefinitions(object):
ALL = '*'
ADMIN_BORDER = 'admin_border'
ADMIN_NODE = 'admin_node'
AEROWAY = 'aeroway'
AMENITY = 'amenity'
BUILDING = 'building'
HISTORIC = 'historic'
LANDUSE = 'landuse'
NATURAL = 'natural'
LOCALITY = 'locality'
NEIGHBORHOOD = 'neighborhood'
OFFICE = 'office'
PLACE = 'place'
POPULATED_PLACE = 'populated_place'
SHOP = 'shop'
TOURISM = 'tourism'
VENUE = 'venue'
WATERWAY = 'waterway'
def __init__(self, filename=DEFAULT_SCRIPT_PATH):
script = open(filename).read()
dependencies = defaultdict(list)
definitions = {}
for definition, text in valid_key_regex.findall(script):
matches = valid_key_regex.findall(script)
match_text = {d.lower(): t for d, t in matches}
for definition, text in matches:
variables = variable_regex.findall(text)
if not variables:
dependencies[definition.lower()] = []
for v in variables:
dependencies[definition.lower()].append(v.lower())
for definition in topsort(dependencies):
definition = definition.lower()
text = match_text[definition]
variables = variable_regex.findall(text)
for v in variables:
v = v.lower()
text = text.replace('$VALID_{}_KEYS'.format(v.upper()), match_text[v])
kvs = defaultdict(set)
for k, v in kv_regex.findall(text):
if v != '':
kvs[k].add(v)
kvs[k].add(v.lower())
else:
kvs[k].add(self.ALL)
@@ -53,8 +78,10 @@ class OSMDefinitions(object):
defs = self.definitions.get(category, {})
if not defs:
return False
elif self.ALL in defs:
return True
for k, v in six.iteritems(props):
if v in defs.get(k, set()):
if v.lower() in defs.get(k.lower(), set()):
return True
return False

View File

@@ -72,14 +72,14 @@ VALID_LEISURE_KEYS="leisure=adult_gaming_centre or leisure=amusement_arcade or l
VALID_LANDUSE_KEYS="landuse=allotmenets or landuse=basin or landuse=cemetery or landuse=commercial or landuse=construction or landuse=farmland or landuse=forest or landuse=grass or landuse=greenhouse_horticulture or landuse=industrial or landuse=landfill or landuse=meadow or landuse=military or landuse=orchard or landuse=plant_nursery or landuse=port or landuse=quarry or landuse=recreation_ground or landuse=resevoir or landuse=residential or landuse=retail or landuse=village_green or landuse=vineyard"
VALID_RAILWAY_KEYS="railway=station"
VALID_VENUES="( ( $VALID_AEROWAY_KEYS ) or ( $VALID_AMENITY_KEYS ) or ( $VALID_HISTORIC_KEYS ) or ( $VALID_OFFICE_KEYS ) or ( $VALID_PLACE_KEYS ) or ( $VALID_SHOP_KEYS ) or ( $VALID_TOURISM_KEYS ) or ( $VALID_LEISURE_KEYS ) or ( $VALID_LANDUSE_KEYS ) or ( $VALID_RAILWAY_KEYS ) )"
VALID_VENUE_KEYS="( ( $VALID_AEROWAY_KEYS ) or ( $VALID_AMENITY_KEYS ) or ( $VALID_HISTORIC_KEYS ) or ( $VALID_OFFICE_KEYS ) or ( $VALID_PLACE_KEYS ) or ( $VALID_SHOP_KEYS ) or ( $VALID_TOURISM_KEYS ) or ( $VALID_LEISURE_KEYS ) or ( $VALID_LANDUSE_KEYS ) or ( $VALID_RAILWAY_KEYS ) )"
# Address data set for use in parser, language detection
echo "Filtering for records with address tags: `date`"
PLANET_ADDRESSES_O5M="planet-addresses.o5m"
JAPAN_ADDRESSES_O5M="japan-addresses.o5m"
VALID_ADDRESSES="( ( ( name= or addr:housename= ) and $VALID_VENUES ) ) or ( ( addr:street= or addr:place= ) and ( name= or building= or building:levels= or addr:housename= or addr:housenumber= ) )"
VALID_ADDRESSES_JAPAN="( addr:housenumber= or addr:street= ) or ( ( name= or name:ja= or addr:housename= ) and $VALID_VENUES )"
VALID_ADDRESSES="( ( ( name= or addr:housename= ) and ( building!=yes or $VALID_VENUE_KEYS ) ) ) or ( ( addr:street= or addr:place= ) and ( name= or building= or building:levels= or addr:housename= or addr:housenumber= ) )"
VALID_ADDRESSES_JAPAN="( addr:housenumber= or addr:street= ) or ( ( name= or name:ja= or addr:housename= ) and ( building!=yes or $VALID_VENUE_KEYS ) )"
osmfilter $PLANET_O5M --keep="$VALID_ADDRESSES" --drop-author --drop-version -o=$PLANET_ADDRESSES_O5M &
osmfilter $JAPAN_O5M --keep="$VALID_ADDRESSES_JAPAN" --drop-author --drop-version -o=$JAPAN_ADDRESSES_O5M &
@@ -114,26 +114,34 @@ PLANET_BORDERS_O5M="planet-borders.o5m"
PLANET_BORDERS="planet-borders.osm"
PLANET_ADMIN_BORDERS_OSM="planet-admin-borders.osm"
VALID_ADMIN_BORDER_KEYS="boundary=administrative or boundary=town or boundary=city_limit or boundary=civil_parish or boundary=civil or boundary=ceremonial or place=island or place=city or place=town or place=village or place=hamlet or place=municipality"
VALID_NEIGHBORHOOD_KEYS="place=city or place=town or place=village or place=hamlet or placement=municipality or place=neighbourhood or place=suburb or place=quarter or place=borough or place=locality"
VALID_ADMIN_BORDER_KEYS="boundary=administrative or boundary=town or boundary=city_limit or boundary=civil_parish or boundary=civil or boundary=ceremonial or place=island or place=city or place=town or place=village or place=hamlet or place=municipality or place=settlement"
VALID_POPULATED_PLACE_KEYS="place=city or place=town or place=village or place=hamlet or placement=municipality or place=locality or place=settlement or place=census-designated or place:ph=village"
VALID_NEIGHBORHOOD_KEYS="place=neighbourhood or place=suburb or place=quarter or place=borough or place:ph=barangay"
VALID_LOCALITY_KEYS="place=city or place=town or place=village or place=hamlet or placement=municipality or place=neighbourhood or place=suburb or place=quarter or place=borough or place=locality or place=settlement or place=census-designated or place:ph=barangay or place:ph=village"
VALID_ADMIN_NODE_KEYS="place=city or place=town or place=village or place=hamlet or placement=municipality or place=neighbourhood or place=suburb or place=quarter or place=borough or place=island or place=islet or place=county or place=region or place=state or place=subdistrict or place=township or place=archipelago or place=department or place=country or place=district or place=census-designated or place=ward or place=subward or place=province or place=peninsula or place=settlement or place=subregion"
osmfilter $PLANET_O5M --keep="$VALID_ADMIN_BORDER_KEYS" --drop-author --drop-version -o=$PLANET_ADMIN_BORDERS_OSM
osmfilter $PLANET_O5M --keep="$VALID_ADMIN_BORDER_KEYS or $VALID_NEIGHBORHOOD_KEYS" --drop-author --drop-version -o=$PLANET_BORDERS_O5M
osmfilter $PLANET_O5M --keep="$VALID_ADMIN_BORDER_KEYS or $VALID_LOCALITY_KEYS" --drop-author --drop-version -o=$PLANET_BORDERS_O5M
PLANET_BORDERS_NODES="planet-borders-nodes.osm"
osmfilter $PLANET_O5M --keep="$VALID_ADMIN_NODE_KEYS" --drop-ways --drop-relations --ignore-dependencies --drop-author --drop-version -o=$PLANET_BORDERS_NODES
PLANET_BORDERS_LATLONS="planet-borders-latlons.o5m"
osmconvert $PLANET_BORDERS_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_BORDERS_LATLONS
rm $PLANET_BORDERS_O5M
osmfilter $PLANET_BORDERS_LATLONS --keep="$VALID_ADMIN_BORDER_KEYS or $VALID_NEIGHBORHOOD_KEYS" -o=$PLANET_BORDERS
osmfilter $PLANET_BORDERS_LATLONS --keep="$VALID_ADMIN_BORDER_KEYS or $VALID_LOCALITY_KEYS" -o=$PLANET_BORDERS
rm $PLANET_BORDERS_LATLONS
echo "Filtering for neighborhoods"
PLANET_NEIGHBORHOODS="planet-neighborhoods.osm"
osmfilter $PLANET_O5M --keep="name= and ( $VALID_NEIGHBORHOOD_KEYS )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS
osmfilter $PLANET_O5M --keep="name= and ( $VALID_LOCALITY_KEYS )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS
echo "Filtering for subdivision polygons"
PLANET_SUBDIVISIONS="planet-subdivisions.osm"
SUBDIVISION_AMENITY_TYPES="amenity=university or amentiy=college or amentiy=school or amentiy=hospital"
SUBDIVISION_LANDUSE_TYPES="landuse=residential or landuse=commercial or landuse=industrial or landuse=retail or landuse=military"
SUBDIVISION_PLACE_TYPES="place=allotmenets or place=city_block or place=plot or place=subdivision"
SUBDIVISION_PLACE_TYPES="place=allotmenets or place=city_block or place=block or place=plot or place=subdivision"
osmfilter $PLANET_O5M --keep="( $SUBDIVISION_AMENITY_TYPES or $SUBDIVISION_PLACE_TYPES or $SUBDIVISION_LANDUSE_TYPES )" --drop="( place= and not ( $SUBDIVISION_PLACE_TYPES ) ) or boundary=" --drop-author --drop-version -o=$PLANET_SUBDIVISIONS
echo "Filtering for postal_code polygons"
@@ -144,12 +152,12 @@ osmfilter $PLANET_O5M --keep="boundary=postal_code" --drop-author --drop-version
# Venue data set for use in venue classification
echo "Filtering for venue records: `date`"
PLANET_VENUES_O5M="planet-venues.o5m"
osmfilter $PLANET_O5M --keep="( name= and ( building!=yes or $VALID_VENUES )" --drop-author --drop-version -o=$PLANET_VENUES_O5M
osmfilter $PLANET_O5M --keep="( name= and ( building!=yes or $VALID_VENUE_KEYS )" --drop-author --drop-version -o=$PLANET_VENUES_O5M
PLANET_VENUES_LATLONS="planet-venues-latlons.o5m"
osmconvert $PLANET_VENUES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_VENUES_LATLONS
rm $PLANET_VENUES_O5M
PLANET_VENUES="planet-venues.osm"
osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( building!=yes or ( $VALID_VENUES ) )" -o=$PLANET_VENUES
osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( building!=yes or ( $VALID_VENUE_KEYS ) )" -o=$PLANET_VENUES
rm $PLANET_VENUES_LATLONS
# Categories for building generic queries like "restaurants in Brooklyn"