[data] deployed model files and training data to CloudFront for easier downloading around the world and in places like China where the Great Fire Wall may prevent large downloads from abroad. TTL is set to 0 so it still caches the files themselves but checks with origin for the If-Modified-Since headers, allowing the files to be updated dynamically

This commit is contained in:
Al
2017-04-17 14:11:44 -04:00
parent 36dc41af8c
commit d2732922c2
2 changed files with 4 additions and 3 deletions

View File

@@ -11,7 +11,8 @@ LIBPOSTAL_VERSION_STRING="v1"
LIBPOSTAL_S3_BUCKET_NAME="libpostal"
LIBPOSTAL_S3_KEY="s3://$LIBPOSTAL_S3_BUCKET_NAME"
LIBPOSTAL_S3_BUCKET_URL="http://$LIBPOSTAL_S3_BUCKET_NAME.s3.amazonaws.com"
LIBPOSTAL_S3_BUCKET_URL="https://$LIBPOSTAL_S3_BUCKET_NAME.s3.amazonaws.com"
LIBPOSTAL_CLOUDFRONT_URL="https://d1p366rbd94x8u.cloudfront.net"
LIBPOSTAL_DATA_FILE="libpostal_data.tar.gz"
LIBPOSTAL_PARSER_FILE="parser.tar.gz"
LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz"
@@ -112,7 +113,7 @@ download_file() {
echo "Checking for new libpostal $name..."
url=$LIBPOSTAL_S3_BUCKET_URL/$prefix/$filename
url=$LIBPOSTAL_CLOUDFRONT_URL/$prefix/$filename
if [ $(curl -sI $url -z "$(cat $updated_path)" --remote-time -w %{http_code} -o /dev/null | grep "^200$") ]; then
echo "New libpostal $name available"