[wof] script to download all the WoF postal code repos and their dependencies
This commit is contained in:
56
scripts/geodata/whosonfirst/download_wof_postal_codes.py
Normal file
56
scripts/geodata/whosonfirst/download_wof_postal_codes.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import gevent
|
||||
from gevent import monkey
|
||||
monkey.patch_all()
|
||||
|
||||
import os
|
||||
import requests
|
||||
import subprocess
|
||||
import sys
|
||||
import ujson as json
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.whosonfirst.crawl import WhosOnFirstCrawler
|
||||
from geodata.file_utils import ensure_dir
|
||||
|
||||
SEED_URLS_JSON = 'https://raw.githubusercontent.com/whosonfirst-data/whosonfirst-data-postalcode/master/data.json'
|
||||
|
||||
|
||||
def clone_repo(wof_dir, repo):
|
||||
repo_name = repo.rstrip('/').rsplit('/', 1)[-1]
|
||||
repo_dir = os.path.join(wof_dir, repo_name)
|
||||
|
||||
subprocess.check_call(['rm', '-rf', repo_dir])
|
||||
subprocess.check_call(['git', 'clone', repo, repo_dir])
|
||||
|
||||
return repo_dir
|
||||
|
||||
|
||||
def download_wof_postcodes(wof_dir):
|
||||
ensure_dir(wof_dir)
|
||||
crawler = WhosOnFirstCrawler(wof_dir)
|
||||
|
||||
response = requests.get(SEED_URLS_JSON)
|
||||
if response.ok:
|
||||
content = json.loads(response.content)
|
||||
|
||||
for d in content:
|
||||
repo_name = d['name']
|
||||
|
||||
if int(d.get('count', 0)) > 0:
|
||||
repo = d['url']
|
||||
print('doing {}'.format(repo_name))
|
||||
|
||||
repo_dir = clone_repo(wof_dir, repo)
|
||||
|
||||
for i, postcode in enumerate(crawler.crawl(repo_dir)):
|
||||
if i % 100 == 0 and i > 0:
|
||||
print('downloaded {} postcodes from WoF'.format(i))
|
||||
else:
|
||||
print('skipping {}'.format(repo_name))
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
sys.exit('Usage: python download_wof_postal_codes.py wof_base_dir')
|
||||
download_wof_postcodes(sys.argv[1])
|
||||
Reference in New Issue
Block a user