diff --git a/scripts/geodata/whosonfirst/crawl.py b/scripts/geodata/whosonfirst/crawl.py
new file mode 100644
index 00000000..948d76ea
--- /dev/null
+++ b/scripts/geodata/whosonfirst/crawl.py
@@ -0,0 +1,74 @@
+import gevent
+import gevent.pool
+
+import os
+import six
+import ujson as json
+
+from geodata.whosonfirst.client import WhosOnFirst
+from geodata.encoding import safe_encode
+from geodata.file_utils import ensure_dir
+
+
+class WhosOnFirstCrawler(object):
+    def __init__(self, wof_dir, cache_size=10000, **s3_args):
+        self.wof_dir = wof_dir
+        self.admin_dir = os.path.join(wof_dir, 'admin')
+        ensure_dir(self.admin_dir)
+        self.client = WhosOnFirst(self.admin_dir, **s3_args)
+
+    def walk_files(self, base_dir):
+        for root, dirs, files in os.walk(os.path.join(base_dir, 'data')):
+            if not files:
+                continue
+            for filename in files:
+                yield os.path.join(root, filename)
+
+    def download_dependencies(self, path):
+        data = json.load(open(path))
+        props = data['properties']
+
+        _, filename = os.path.split(path)
+        current_wof_id = filename.rsplit('.geojson', 1)[0]
+
+        for hierarchy in props.get('wof:hierarchy', []):
+            for key, wof_id in six.iteritems(hierarchy):
+                wof_id = safe_encode(wof_id)
+
+                if wof_id != current_wof_id and wof_id != '-1' and not self.client.exists_locally(wof_id):
+                    if not self.client.download_file(wof_id):
+                        print('error downloading {}'.format(wof_id))
+                        continue
+        return props.get('name')
+
+    def data_and_dependencies(self, path):
+        data = json.load(open(path))
+        props = data['properties']
+
+        _, filename = os.path.split(path)
+        current_wof_id = filename.rsplit('.geojson', 1)[0]
+
+        dependencies = {}
+
+        for hierarchy in props.get('wof:hierarchy', []):
+            for key, wof_id in six.iteritems(hierarchy):
+                wof_id = safe_encode(wof_id)
+                if wof_id in dependencies or wof_id == current_wof_id:
+                    continue
+
+                if not self.client.exists_locally(wof_id):
+                    continue
+
+                value = self.client.load(wof_id)
+
+                # Only include properties, not all the polygon data
+                dependencies[wof_id] = value.get('properties', {})
+
+        return data, dependencies
+
+    def load(self, repo_dir):
+        return (self.data_and_dependencies(filename) for filename in self.walk_files(repo_dir))
+
+    def crawl(self, repo_dir, workers=10):
+        workers = gevent.pool.Pool(workers)
+        return workers.imap_unordered(self.download_dependencies, self.walk_files(repo_dir))