diff --git a/scripts/geodata/osm/fetch_osm_address_data.sh b/scripts/geodata/osm/fetch_osm_address_data.sh index d0ef0156..89fcf74e 100755 --- a/scripts/geodata/osm/fetch_osm_address_data.sh +++ b/scripts/geodata/osm/fetch_osm_address_data.sh @@ -50,15 +50,17 @@ osmconvert $PLANET_PBF -o=$PLANET_O5M rm $PLANET_PBF IS_AIRPORT="aeroway=aerodrome" -VALID_AMENITIES="amenity=ambulance_station or amenity=animal_boarding or amenity=animal_shelter or amenity=arts_centre or amenity=baby_hatch or amenity=bank or amenity=bar or amenity=bbq or amenity=biergarten or amenity=boathouse or amenity=boat_rental or amenity=boat_sharing or amenity=boat_storage or amenity=brothel or amenity=bureau_de_change or amenity=bus_station or amenity=cafe or amenity=car_rental or amenity=car_sharing or amenity=car_wash or amenity=casino or amenity=cemetery or amenity=charging_station or amenity=cinema or amenity=childcare or amenity=clinic or amenity=club or amenity=clock or amenity=college or amenity=community_center or amenity=community_centre or amenity=community_hall or amenity=concert_hall or amenity=conference_centre or amenity=courthouse or amenity=coworking_space or amenity=crematorium or amenity=crypt or amenity=culture_center or amenity=dancing_school or amenity=dentist or amenity=dive_centre or amenity=doctors or amenity=dojo or amenity=dormitory or amenity=driving_school or amenity=embassy or amenity=emergency_service or amenity=events_venue or amenity=exhibition_centre or amenity=fast_food or amenity=ferry_terminal or amenity=festival_grounds or amenity=fire_station or amenity=food_count or amenity=fountain or amenity=fuel or amenity=gambling or amenity=game_feeding or amenity=grave_yard or amenity=greenhouse or amenity=gym or amenity=health_centre or amenity=hospice or amenity=hospital or amenity=hunting_stand or amenity=ice_cream or amenity=internet_cafe or amenity=kindergarten or amenity=kiosk or amenity=kneipp_water_cure or amenity=language_school or amenity=lavoir or amenity=library or amenity=love_hotel or amenity=market or amenity=marketplace or amenity=medical_centre or amenity=mobile_money_agent or amenity=monastery or amenity=money_transfer or amenity=mortuary or amenity=music_school or amenity=music_venue or amenity=nightclub or amenity=nursery or amenity=nursing_home or amenity=office or amenity=parish_hall or amenity=park or amenity=pharmacy or amenity=planetarium or amenity=place_of_worship or amenity=police or amenity=post_office or amenity=preschool or amenity=prison or amenity=pub or amenity=public_bath or amenity=public_bookcase or amenity=public_building or amenity=public_facility or amenity=public_hall or amenity=public_market or amenity=ranger_station or amenity=refugee_housing or amenity=register_office or amenity=research_institute or amenity=rescue_station or amenity=residential or amenity=Residential or amenity=restaurant or amenity=retirement_home or amenity=sacco or amenity=sanitary_dump_station or amenity=sanitorium or amenity=sauna or amenity=school or amenity=shelter or amenity=shop or amenity=shower or amenity=ski_rental or amenity=ski_school or amenity=social_centre or amenity=social_club or amenity=social_facility or amenity=spa or amenity=stables or amenity=stripclub or amenity=studio or amenity=swimming_pool or amenity=swingerclub or amenity=townhall or amenity=theatre or amenity=training or amenity=trolley_bay or amenity=university or amenity=vehicle_inspection or amenity=veterinary or amenity=village_hall or amenity=vivarium or amenity=waste_transfer_station or amenity=whirlpool or amenity=winery or amenity=youth_centre" -GENERIC_AMENITIES="amenity=atm or amenity=bench or amenity=bicycle_parking or amenity=bicycle_rental or amenity=bicycle_repair_station or amenity=compressed_air or amenity=drinking_water or amenity=emergency_phone or amenity=grit_bin or amenity=motorcycle_parking or amenity=parking or amenity=parking_space or amenity=recycling or amenity=taxi or amenity=ticket_validator or amenity=toilets or amenity=vending_machine or amenity=waste_basket or amenity=waste_disposal or amenity=water_point or amenity=watering_place" +VALID_AMENITIES="amenity=ambulance_station or amenity=animal_boarding or amenity=animal_shelter or amenity=arts_centre or amenity=auditorium or amenity=baby_hatch or amenity=bank or amenity=bar or amenity=bbq or amenity=biergarten or amenity=boathouse or amenity=boat_rental or amenity=boat_sharing or amenity=boat_storage or amenity=brothel or amenity=bureau_de_change or amenity=bus_station or amenity=cafe or amenity=car_rental or amenity=car_sharing or amenity=car_wash or amenity=casino or amenity=cemetery or amenity=charging_station or amenity=cinema or amenity=childcare or amenity=clinic or amenity=club or amenity=clock or amenity=college or amenity=community_center or amenity=community_centre or amenity=community_hall or amenity=concert_hall or amenity=conference_centre or amenity=courthouse or amenity=coworking_space or amenity=crematorium or amenity=crypt or amenity=culture_center or amenity=dancing_school or amenity=dentist or amenity=dive_centre or amenity=doctors or amenity=dojo or amenity=dormitory or amenity=driving_school or amenity=embassy or amenity=emergency_service or amenity=events_venue or amenity=exhibition_centre or amenity=fast_food or amenity=ferry_terminal or amenity=festival_grounds or amenity=fire_station or amenity=food_count or amenity=fountain or amenity=fuel or amenity=gambling or amenity=game_feeding or amenity=grave_yard or amenity=greenhouse or amenity=gym or amenity=hall or amenity=health_centre or amenity=hospice or amenity=hospital or amenity=hotel or amenity=hunting_stand or amenity=ice_cream or amenity=internet_cafe or amenity=kindergarten or amenity=kiosk or amenity=kneipp_water_cure or amenity=language_school or amenity=lavoir or amenity=library or amenity=love_hotel or amenity=market or amenity=marketplace or amenity=medical_centre or amenity=mobile_money_agent or amenity=monastery or amenity=money_transfer or amenity=mortuary or amenity=mountain_rescue or amenity=music_school or amenity=music_venue or amenity=nightclub or amenity=nursery or amenity=nursing_home or amenity=office or amenity=parish_hall or amenity=park or amenity=pharmacy or amenity=planetarium or amenity=place_of_worship or amenity=police or amenity=post_office or amenity=preschool or amenity=prison or amenity=pub or amenity=public_bath or amenity=public_bookcase or amenity=public_building or amenity=public_facility or amenity=public_hall or amenity=public_market or amenity=ranger_station or amenity=refugee_housing or amenity=register_office or amenity=research_institute or amenity=rescue_station or amenity=residential or amenity=Residential or amenity=restaurant or amenity=retirement_home or amenity=sacco or amenity=sanitary_dump_station or amenity=sanitorium or amenity=sauna or amenity=school or amenity=shelter or amenity=shop or amenity=shopping or amenity=shower or amenity=ski_rental or amenity=ski_school or amenity=social_centre or amenity=social_club or amenity=social_facility or amenity=spa or amenity=stables or amenity=stripclub or amenity=studio or amenity=supermarket or amenity=swimming_pool or amenity=swingerclub or amenity=townhall or amenity=theatre or amenity=training or amenity=trolley_bay or amenity=university or amenity=vehicle_inspection or amenity=veterinary or amenity=village_hall or amenity=vivarium or amenity=waste_transfer_station or amenity=whirlpool or amenity=winery or amenity=youth_centre" +GENERIC_AMENITIES="amenity=atm or amenity=bench or amenity=bicycle_parking or amenity=bicycle_rental or amenity=bicycle_repair_station or amenity=compressed_air or amenity=drinking_water or amenity=emergency_phone or amenity=fire_hydrant or amenity=grit_bin or amenity=motorcycle_parking or amenity=parking or amenity=parking_space or amenity=post_box or amenity=reception_area or amenity=recycling or amenity=taxi or amenity=telephone or amenity=ticket_validator or amenity=toilets or amenity=vending_machine or amenity=waste_basket or amenity=waste_disposal or amenity=water_point or amenity=watering_place or amenity=wifi" VALID_OFFICE_KEYS="office=accountant or office=administrative or office=administration or office=advertising_agency or office=architect or office=association or office=camping or office=charity or office=company or office=consulting or office=educational_institution or office=employment_agency or office=estate_agent or office=financial or office=forestry or office=foundation or office=government or office=insurance or office=it or office=lawyer or office=newspaper or office=ngo or office=notary or office=parish or office=physician or office=political_party or office=publisher or office=quango or office=real_estate_agent or office=realtor or office=register or office=religion or office=research or office=tax or office=tax_advisor or office=telecommunication or office=therapist or office=travel_agent or office=water_utility" VALID_SHOP_KEYS="shop=" +VALID_HISTORIC_KEYS="historic=archaeological_site or historic=castle or historic=fort or historic=memorial or historic=monument or historic=ruins or historic=tomb" VALID_TOURISM_KEYS="tourism=hotel or tourism=attraction or tourism=guest_house or tourism=museum or tourism=chalet or tourism=motel or tourism=hostel or tourism=alpine_hut or tourism=theme_park or tourism=zoo or tourism=apartment or tourism=wilderness_hut or tourism=gallery or tourism=bed_and_breakfast or tourism=hanami or tourism=wine_cellar or tourism=resort or tourism=aquarium or tourism=apartments or tourism=cabin or tourism=winery or tourism=hut" VALID_LEISURE_KEYS="leisure=adult_gaming_centre or leisure=amusement_arcade or leisure=arena or leisure=bandstand or leisure=beach_resort or leisure=bbq or leisure=bird_hide or leisure=bowling_alley or leisure=casino or leisure=common or leisure=club or leisure=dance or leisure=dancing or leisure=disc_golf_course or leisure=dog_park or leisure=fishing or leisure=fitness_centre or leisure=gambling or leisure=garden or leisure=golf_course or leisure=hackerspace or leisure=horse_riding or leisure=hospital or leisure=hot_spring or leisure=ice_rink leisure=landscape_reserve or leisure=marina or leisure=maze or leisure=miniature_golf or leisure=nature_reserve or leisure=padding_pool or leisure=park or leisure=pitch or leisure=playground or leisure=recreation_ground or leisure=resort or leisure=sailing_club or leisure=sauna or leisure=social_club or leisure=sports_centre or leisure=stadium or leisure=summer_camp or leisure=swimming_pool or leisure=tanning_salon or leisure=track or leisure=trampoline_park or leisure=turkish_bath or leisure=video_arcade or leisure=water_park or leisure=wildlife_hide" +VALID_LANDUSE_KEYS="landuse=allotmenets or landuse=basin or landuse=cemetery or landuse=commercial or landuse=construction or landuse=farmland or landuse=forest or landuse=grass or landuse=greenhouse_horticulture or landuse=industrial or landuse=landfill or landuse=meadow or landuse=military or landuse=orchard or landuse=plant_nursery or landuse=port or landuse=quarry or landuse=recreation_ground or landuse=resevoir or landuse=residential or landuse=retail or landuse=village_green or landuse=vineyard" -VALID_VENUES="( ( $IS_AIRPORT ) or ( $VALID_AMENITIES ) or ( $VALID_OFFICE_KEYS ) or ( $VALID_SHOP_KEYS ) or ( $VALID_TOURISM_KEYS ) or ( $VALID_LEISURE_KEYS ) )" +VALID_VENUES="( ( $IS_AIRPORT ) or ( $VALID_AMENITIES ) or ( $VALID_HISTORIC_KEYS ) or ( $VALID_OFFICE_KEYS ) or ( $VALID_SHOP_KEYS ) or ( $VALID_TOURISM_KEYS ) or ( $VALID_LEISURE_KEYS ) or ( $VALID_LANDUSE_KEYS ) )" # Address data set for use in parser, language detection echo "Filtering for records with address tags: `date`" @@ -76,22 +78,30 @@ echo "Filtering for borders: `date`" PLANET_BORDERS_O5M="planet-borders.o5m" PLANET_BORDERS="planet-borders.osm" PLANET_ADMIN_BORDERS_OSM="planet-admin-borders.osm" -osmfilter $PLANET_O5M --keep="boundary=administrative or boundary=town or boundary=city_limit or boundary=civil_parish or boundary=ceremonial or place=island" --drop-author --drop-version -o=$PLANET_ADMIN_BORDERS_OSM -osmfilter $PLANET_O5M --keep="boundary=administrative or place=city or place=town or place=village or place=hamlet or place=neighbourhood or place=suburb or place=quarter or place=borough" --drop-author --drop-version -o=$PLANET_BORDERS_O5M + +VALID_ADMIN_BORDERS="boundary=administrative or boundary=town or boundary=city_limit or boundary=civil_parish or boundary=ceremonial or place=island" +VALID_NEIGHBORHOODS="place=city or place=town or place=village or place=hamlet or place=neighbourhood or place=suburb or place=quarter or place=borough or place=locality" + +osmfilter $PLANET_O5M --keep="$VALID_ADMIN_BORDERS" --drop-author --drop-version -o=$PLANET_ADMIN_BORDERS_OSM +osmfilter $PLANET_O5M --keep="$VALID_ADMIN_BORDERS or $VALID_NEIGHBORHOODS" --drop-author --drop-version -o=$PLANET_BORDERS_O5M PLANET_BORDERS_LATLONS="planet-borders-latlons.o5m" osmconvert $PLANET_BORDERS_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_BORDERS_LATLONS rm $PLANET_BORDERS_O5M -osmfilter $PLANET_BORDERS_LATLONS --keep="boundary=administrative or place=city or place=town or place=village or place=hamlet or place=neighbourhood or place=suburb or place=quarter or place=borough" -o=$PLANET_BORDERS +osmfilter $PLANET_BORDERS_LATLONS --keep="$VALID_ADMIN_BORDERS or $VALID_NEIGHBORHOODS" -o=$PLANET_BORDERS rm $PLANET_BORDERS_LATLONS echo "Filtering for neighborhoods" PLANET_NEIGHBORHOODS="planet-neighborhoods.osm" -osmfilter $PLANET_O5M --keep="name= and ( place=neighbourhood or place=suburb or place=quarter or place=borough or place=locality )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS +osmfilter $PLANET_O5M --keep="name= and ( $VALID_NEIGHBORHOODS )" --drop-relations --drop-ways --ignore-dependencies --drop-author --drop-version -o=$PLANET_NEIGHBORHOODS + +echo "Filtering for landuse polygons" +PLANET_LANDUSE="planet-landuse.osm" +osmfilter $PLANET_O5M --keep="name= and ( landuse=residential or landuse=commercial or landuse=industrial or landuse=retail or landuse=military )" --drop-author --drop-version -o=$PLANET_LANDUSE # Venue data set for use in venue classification echo "Filtering for venue records: `date`" PLANET_VENUES_O5M="planet-venues.o5m" -osmfilter $PLANET_O5M --keep="( name= and building= ) or ( $VALID_VENUES ) or ( $GENERIC_AMENITIES )" --drop-author --drop-version -o=$PLANET_VENUES_O5M +osmfilter $PLANET_O5M --keep="( name= and building= ) or ( $VALID_VENUES )" --drop-author --drop-version -o=$PLANET_VENUES_O5M PLANET_VENUES_LATLONS="planet-venues-latlons.o5m" osmconvert $PLANET_VENUES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_VENUES_LATLONS rm $PLANET_VENUES_O5M @@ -99,6 +109,18 @@ PLANET_VENUES="planet-venues.osm" osmfilter $PLANET_VENUES_LATLONS --keep="name= and ( building= or ( $VALID_VENUES ) )" -o=$PLANET_VENUES rm $PLANET_VENUES_LATLONS +# Categories for building generic queries like "restaurants in Brooklyn" +echo "Filtering for categories: `date`" +PLANET_CATEGORIES_O5M="planet-categories.o5m" +VALID_CATEGORIES="aeroway= or amenity= or building= or emergency= or highway= or historic= or internet_access= or landuse= or leisure= or man_made= or mountain_pass= or natural= or office= or place= or railway= or shop= or tourism= or waterway=" +osmfilter $PLANET_O5M --keep="$VALID_CATEGORIES" --drop-author --drop-version -o=$PLANET_CATEGORIES_O5M +PLANET_CATEGORIES_LATLONS="planet-categories-latlons.o5m" +osmconvert $PLANET_CATEGORIES_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_CATEGORIES_LATLONS +rm $PLANET_CATEGORIES_O5M +PLANET_CATEGORIES="planet-categories.osm" +osmfilter $PLANET_CATEGORIES_LATLONS --keep="$VALID_CATEGORIES" -o=$PLANET_CATEGORIES +rm $PLANET_CATEGORIES_LATLONS + # Streets data set for use in language classification echo "Filtering ways: `date`" PLANET_WAYS_O5M="planet-ways.o5m" @@ -108,7 +130,10 @@ PLANET_WAYS_NODES_LATLON="planet-ways-nodes-latlons.o5m" osmconvert $PLANET_WAYS_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANET_WAYS_NODES_LATLON # 10^15 is the offset used for ways and relations with --all-to-ndoes, extracts just the ways PLANET_WAYS_LATLONS="planet-ways.osm" -osmfilter $PLANET_WAYS_NODES_LATLON --keep="name= and ( highway=motorway or highway=motorway_link or highway=trunk or highway=trunk_link or highway=primary or highway=primary_link or highway=secondary or highway=secondary_link or highway=tertiary or highway=tertiary_link or highway=unclassified or highway=unclassified_link or highway=residential or highway=residential_link or highway=service or highway=service_link or highway=living_street or highway=pedestrian or highway=track or highway=road or ( highway=path and ( motorvehicle=yes or motorcar=yes ) ) )" -o=$PLANET_WAYS_LATLONS + +VALID_ROAD_TYPES="( highway=motorway or highway=motorway_link or highway=motorway_junction or highway=trunk or highway=trunk_link or highway=primary or highway=primary_link or highway=secondary or highway=secondary_link or highway=tertiary or highway=tertiary_link or highway=unclassified or highway=unclassified_link or highway=residential or highway=residential_link or highway=service or highway=service_link or highway=living_street or highway=pedestrian or highway=steps or highway=cycleway or highway=bridleway or highway=track or highway=road or ( highway=path and ( motorvehicle=yes or motorcar=yes ) ) )" +osmfilter $PLANET_WAYS_NODES_LATLON --keep="name= and ( $VALID_ROAD_TYPES )" -o=$PLANET_WAYS_LATLONS +osmfilter $PLANET_WAYS_O5M --keep="name= and ( $VALID_ROAD_TYPES )" -o=$PLANET_WAYS_LATLONS rm $PLANET_WAYS_NODES_LATLON rm $PLANET_WAYS_O5M