[build] Using a process pool with 64MB chunks (similar to aws cli) for S3 downloads. Setting the max concurrent requeests to 10, also the default in aws cli.
This commit is contained in:
@@ -38,7 +38,9 @@ EPOCH_DATE="Jan 1 00:00:00 1970"
|
|||||||
MB=$((1024*1024))
|
MB=$((1024*1024))
|
||||||
LARGE_FILE_SIZE=$((100*$MB))
|
LARGE_FILE_SIZE=$((100*$MB))
|
||||||
|
|
||||||
NUM_WORKERS=5
|
CHUNK_SIZE=$((64*$MB))
|
||||||
|
|
||||||
|
NUM_WORKERS=10
|
||||||
|
|
||||||
function kill_background_processes {
|
function kill_background_processes {
|
||||||
jobs -p | xargs kill;
|
jobs -p | xargs kill;
|
||||||
@@ -47,30 +49,40 @@ function kill_background_processes {
|
|||||||
|
|
||||||
trap kill_background_processes SIGINT
|
trap kill_background_processes SIGINT
|
||||||
|
|
||||||
|
function download_part() {
|
||||||
|
i=$1
|
||||||
|
offset=$2
|
||||||
|
max=$3
|
||||||
|
url=$4
|
||||||
|
part_filename=$5
|
||||||
|
echo "Downloading part $i: filename=$part_filename, offset=$offset, max=$max"
|
||||||
|
curl $url --silent -H"Range:bytes=$offset-$max" -o $part_filename
|
||||||
|
}
|
||||||
|
export -f download_part
|
||||||
|
|
||||||
function download_multipart() {
|
function download_multipart() {
|
||||||
url=$1
|
url=$1
|
||||||
filename=$2
|
filename=$2
|
||||||
size=$3
|
size=$3
|
||||||
num_workers=$4
|
num_workers=$4
|
||||||
|
|
||||||
echo "Downloading multipart: $url, size=$size"
|
num_chunks=$((size/CHUNK_SIZE))
|
||||||
chunk_size=$((size/num_workers))
|
echo "Downloading multipart: $url, size=$size, num_chunks=$num_chunks"
|
||||||
|
|
||||||
offset=0
|
offset=0
|
||||||
for i in `seq 1 $((num_workers-1))`; do
|
for i in $(seq 1 $((num_chunks))); do
|
||||||
part_filename="$filename.$i"
|
part_filename="$filename.$i"
|
||||||
echo "Downloading part $i: filename=$part_filename, offset=$offset, max=$((offset+chunk_size-1))"
|
if [ $i -lt $num_chunks ]; then
|
||||||
curl $url --silent -H"Range:bytes=$offset-$((offset+chunk_size-1))" -o $part_filename &
|
max=$((offset+CHUNK_SIZE-1));
|
||||||
offset=$((offset+chunk_size))
|
else
|
||||||
done;
|
max=$size;
|
||||||
|
fi;
|
||||||
echo "Downloading part $num_workers: filename=$filename.$num_workers, offset=$offset, max=$((size))"
|
printf "%s\0%s\0%s\0%s\0%s\0" "$i" "$offset" "$max" "$url" "$part_filename"
|
||||||
curl --silent -H"Range:bytes=$offset-$size" $url -o "$filename.$num_workers" &
|
offset=$((offset+CHUNK_SIZE))
|
||||||
wait
|
done | xargs -0 -n 5 -P $NUM_WORKERS bash -c 'download_part "$@"' --
|
||||||
|
|
||||||
> $local_path
|
> $local_path
|
||||||
|
|
||||||
for i in `seq 1 $((num_workers))`; do
|
for i in `seq 1 $((num_chunks))`; do
|
||||||
part_filename="$filename.$i"
|
part_filename="$filename.$i"
|
||||||
cat $part_filename >> $local_path
|
cat $part_filename >> $local_path
|
||||||
rm $part_filename
|
rm $part_filename
|
||||||
|
|||||||
Reference in New Issue
Block a user