[doc] Adding some comments to fetch_osm_address_data.sh
This commit is contained in:
@@ -1,11 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
: '
|
||||
fetch_osm_address_data.sh
|
||||
-------------------------
|
||||
|
||||
Shell script to download OSM planet and derive inputs
|
||||
for language detection and address parser training set
|
||||
construction.
|
||||
'
|
||||
|
||||
if [ "$#" -eq 1 ]; then
|
||||
OUT_DIR=$1
|
||||
else
|
||||
OUT_DIR=`pwd`
|
||||
fi
|
||||
|
||||
# Check for osmfilter and osmconvert
|
||||
if ! type -P osmfilter osmconvert > /dev/null; then
|
||||
cat << EOF
|
||||
ERROR: osmfilter and osmconvert are required
|
||||
@@ -24,6 +34,8 @@ PREV_DIR=`pwd`
|
||||
|
||||
cd $OUT_DIR
|
||||
|
||||
# Download planet as PBF
|
||||
# TODO: currently uses single mirror, randomly choose one instead
|
||||
echo "Started OSM download: `date`"
|
||||
wget http://ftp5.gwdg.de/pub/misc/openstreetmap/planet.openstreetmap.org/pbf/planet-latest.osm.pbf
|
||||
|
||||
@@ -31,8 +43,11 @@ echo "Converting to o5m: `date`"
|
||||
PLANET_PBF="planet-latest.osm.pbf"
|
||||
PLANET_O5M="planet-latest.o5m"
|
||||
|
||||
# Needs to be in O5M for some of the subsequent steps to work whereas PBF is smaller for download
|
||||
osmconvert $PLANET_PBF -o=$PLANET_O5M
|
||||
rm $PLANET_PBF
|
||||
|
||||
# Address data set for use in parser, language detection
|
||||
echo "Filtering for records with address tags: `date`"
|
||||
PLANET_ADDRESSES_O5M="planet-addresses.o5m"
|
||||
osmfilter $PLANET_O5M --keep="addr:street= and ( ( name= and amenity= ) or addr:housename= or addr:housenumber= )" --drop-author --drop-version -o=$PLANET_ADDRESSES_O5M
|
||||
@@ -43,6 +58,7 @@ PLANET_ADDRESSES="planet-addresses.osm"
|
||||
osmfilter $PLANET_ADDRESSES_LATLONS --keep="addr:street= and ( ( name= and amenity= ) or addr:housename= or addr:housenumber= )" -o=$PLANET_ADDRESSES
|
||||
rm $PLANET_ADDRESSES_LATLONS
|
||||
|
||||
# Border data set for use in R-tree index/reverse geocoding, parsing, language detection
|
||||
echo " Filtering for borders: `date`"
|
||||
PLANET_BORDERS_O5M="planet-borders.o5m"
|
||||
PLANET_BORDERS="planet-borders.osm"
|
||||
@@ -55,7 +71,7 @@ rm $PLANET_BORDERS_O5M
|
||||
osmfilter $PLANET_BORDERS_LATLONS --keep="boundary=administrative or place=city or place=town or place=neighbourhood or place=suburb" -o=$PLANET_BORDERS
|
||||
rm $PLANET_BORDERS_LATLONS
|
||||
|
||||
|
||||
# Venue data set for use in venue classification
|
||||
echo "Filtering for venue records: `date`"
|
||||
PLANET_VENUES_O5M="planet-venues.o5m"
|
||||
osmfilter $PLANET_O5M --keep="name= and ( amenity= or building= )" --drop-author --drop-version -o=$PLANET_VENUES_O5M
|
||||
@@ -66,12 +82,11 @@ PLANET_VENUES="planet-venues.osm"
|
||||
osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( amenity= or building= )" -o=$PLANET_VENUES
|
||||
rm $PLANET_VENUES_LATLONS
|
||||
|
||||
# Streets data set for use in language classification
|
||||
echo "Filtering ways: `date`"
|
||||
PLANET_WAYS_O5M="planet-ways.o5m"
|
||||
osmfilter planet-latest.o5m --keep="name= and highway=" --drop-relations --drop-author --drop-version -o=$PLANET_WAYS_O5M
|
||||
rm $PLANET_O5M
|
||||
|
||||
echo "Extracting ways: `date`"
|
||||
PLANET_WAYS_NODES_LATLON="planet-ways-nodes-latlons.o5m"
|
||||
osmconvert $PLANET_WAYS_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_WAYS_NODES_LATLON
|
||||
# 10^15 is the offset used for ways and relations with --all-to-ndoes, extracts just the ways
|
||||
|
||||
Reference in New Issue
Block a user