From d1e3c6a24af3b38999e5a298579f81c44e20da47 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 8 Sep 2016 15:13:25 -0400 Subject: [PATCH] [openaddresses] adding Italy countrywide to a pre_release_downloads set so it can be used in libpostal without having been merged yet --- resources/parser/data_sets/openaddresses.yaml | 6 ++++++ scripts/geodata/openaddresses/config.py | 1 + .../openaddresses/download_openaddresses.py | 17 ++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/resources/parser/data_sets/openaddresses.yaml b/resources/parser/data_sets/openaddresses.yaml index 6c884975..68455fd1 100644 --- a/resources/parser/data_sets/openaddresses.yaml +++ b/resources/parser/data_sets/openaddresses.yaml @@ -12,6 +12,10 @@ global: place_only_probability: 0.2 place_and_postcode_probability: 0.1 + pre_release_downloads: + # Italy countrywide with postcodes, must have + - "http://s3.amazonaws.com/data.openaddresses.io/runs/104280/it/countrywide.zip" + fields: &default_fields - field_name: NUMBER component: house_number @@ -557,6 +561,8 @@ countries: add_osm_boundaries: true it: + files: + - filename: countrywide.csv subdirs: # Trentino-Alto Adige/Südtirol 32: diff --git a/scripts/geodata/openaddresses/config.py b/scripts/geodata/openaddresses/config.py index 9a9eaaba..42a4353e 100644 --- a/scripts/geodata/openaddresses/config.py +++ b/scripts/geodata/openaddresses/config.py @@ -14,6 +14,7 @@ class OpenAddressesConfig(object): config = yaml.load(open(path)) self.config = config['global'] + self.pre_release_downloads = config.get('pre_release_downloads', []) self.country_configs = config['countries'] @property diff --git a/scripts/geodata/openaddresses/download_openaddresses.py b/scripts/geodata/openaddresses/download_openaddresses.py index e8cce89d..3ac02ccc 100644 --- a/scripts/geodata/openaddresses/download_openaddresses.py +++ b/scripts/geodata/openaddresses/download_openaddresses.py @@ -7,7 +7,7 @@ import sys import tempfile import yaml -from six.moves.urllib_parse import urljoin, quote_plus +from six.moves.urllib_parse import urljoin, quote_plus, unquote_plus this_dir = os.path.realpath(os.path.dirname(__file__)) sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir))) @@ -37,6 +37,17 @@ def download_and_unzip_file(url, out_dir): return success +def download_pre_release_downloads(out_dir): + for url in openaddresses_config.pre_release_downloads: + print(six.u('doing pre_release {}').format(safe_decode(url))) + + success = download_and_unzip_file(url, out_dir) + if not success: + print(six.u('ERR: could not download {}').format(source)) + return False + return True + + def openaddresses_download_all_files(out_dir): temp_dir = tempfile.gettempdir() @@ -50,6 +61,8 @@ def openaddresses_download_all_files(out_dir): source_index = headers.index('source') url_index = headers.index('processed') + download_pre_release_downloads(out_dir) + for row in reader: source = row[source_index].rsplit('.')[0] processed = row[url_index] @@ -74,6 +87,8 @@ def openaddresses_download_configured_files(out_dir): url = urljoin(OPENADDRESSES_LATEST_DIR, zip_url_path) + download_pre_release_downloads(out_dir) + print(six.u('doing {}').format(safe_decode(source))) success = download_and_unzip_file(url, out_dir) if not success: