[osm] No longer requiring street name in OSM planet addresses. Adding leisure and tourism keys to capture things like parks, squares, etc. Adding place=locality for neighborhoods.

This commit is contained in:
Al
2016-03-09 18:19:33 -05:00
parent b3c8a72ca6
commit d43fe201ff

View File

@@ -7,6 +7,8 @@ fetch_osm_address_data.sh
Shell script to download OSM planet and derive inputs Shell script to download OSM planet and derive inputs
for language detection and address parser training set for language detection and address parser training set
construction. construction.
Usage: ./fetch_osm_address_data.sh out_dir
' '
if [ "$#" -ge 1 ]; then if [ "$#" -ge 1 ]; then
@@ -50,12 +52,12 @@ rm $PLANET_PBF
# Address data set for use in parser, language detection # Address data set for use in parser, language detection
echo "Filtering for records with address tags: `date`" echo "Filtering for records with address tags: `date`"
PLANET_ADDRESSES_O5M="planet-addresses.o5m" PLANET_ADDRESSES_O5M="planet-addresses.o5m"
osmfilter $PLANET_O5M --keep="addr:street= and ( ( name= and amenity= ) or addr:housename= or addr:housenumber= )" --drop-author --drop-version -o=$PLANET_ADDRESSES_O5M osmfilter $PLANET_O5M --keep="( ( name= and ( amenity= or leisure= or tourism= ) ) or ( addr:street= and ( addr:housename= or addr:housenumber= ) ) )" --drop-author --drop-version -o=$PLANET_ADDRESSES_O5M
PLANET_ADDRESSES_LATLONS="planet-addresses-latlons.o5m" PLANET_ADDRESSES_LATLONS="planet-addresses-latlons.o5m"
osmconvert $PLANET_ADDRESSES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_ADDRESSES_LATLONS osmconvert $PLANET_ADDRESSES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_ADDRESSES_LATLONS
rm $PLANET_ADDRESSES_O5M rm $PLANET_ADDRESSES_O5M
PLANET_ADDRESSES="planet-addresses.osm" PLANET_ADDRESSES="planet-addresses.osm"
osmfilter $PLANET_ADDRESSES_LATLONS --keep="addr:street= and ( ( name= and amenity= ) or addr:housename= or addr:housenumber= )" -o=$PLANET_ADDRESSES osmfilter $PLANET_ADDRESSES_LATLONS --keep="( ( name= and ( amenity= or leisure= or tourism= ) ) or ( addr:street= and ( addr:housename= or addr:housenumber= ) ) )" -o=$PLANET_ADDRESSES
rm $PLANET_ADDRESSES_LATLONS rm $PLANET_ADDRESSES_LATLONS
# Border data set for use in R-tree index/reverse geocoding, parsing, language detection # Border data set for use in R-tree index/reverse geocoding, parsing, language detection
@@ -73,17 +75,17 @@ rm $PLANET_BORDERS_LATLONS
echo "Filtering for neighborhoods" echo "Filtering for neighborhoods"
PLANET_NEIGHBORHOODS="planet-neighborhoods.osm" PLANET_NEIGHBORHOODS="planet-neighborhoods.osm"
osmfilter $PLANET_O5M --keep="name= and ( place=neighbourhood or place=suburb or place=quarter or place=borough )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS osmfilter $PLANET_O5M --keep="name= and ( place=neighbourhood or place=suburb or place=quarter or place=borough or place=locality )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS
# Venue data set for use in venue classification # Venue data set for use in venue classification
echo "Filtering for venue records: `date`" echo "Filtering for venue records: `date`"
PLANET_VENUES_O5M="planet-venues.o5m" PLANET_VENUES_O5M="planet-venues.o5m"
osmfilter $PLANET_O5M --keep="name= and ( amenity= or building= )" --drop-author --drop-version -o=$PLANET_VENUES_O5M osmfilter $PLANET_O5M --keep="name= and ( amenity= or building= or leisure= or tourism= )" --drop-author --drop-version -o=$PLANET_VENUES_O5M
PLANET_VENUES_LATLONS="planet-venues-latlons.o5m" PLANET_VENUES_LATLONS="planet-venues-latlons.o5m"
osmconvert $PLANET_VENUES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_VENUES_LATLONS osmconvert $PLANET_VENUES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_VENUES_LATLONS
rm $PLANET_VENUES_O5M rm $PLANET_VENUES_O5M
PLANET_VENUES="planet-venues.osm" PLANET_VENUES="planet-venues.osm"
osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( amenity= or building= )" -o=$PLANET_VENUES osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( amenity= or building= or leisure= or tourism= )" -o=$PLANET_VENUES
rm $PLANET_VENUES_LATLONS rm $PLANET_VENUES_LATLONS
# Streets data set for use in language classification # Streets data set for use in language classification