File: //usr/local/bin/cache-url
#!/bin/bash
# Copyright 2012 Hewlett-Packard Development Company, L.P.
# Copyright 2013 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
if [ ${DIB_DEBUG_TRACE:-0} -gt 1 ]; then
    set -x
fi
set -eu
set -o pipefail
CURL_DASH_V=''
if [ ${DIB_DEBUG_TRACE:-0} -gt 1 ]; then
    CURL_DASH_V='-v'
fi
# Download a URL to a local cache
# e.g. cache-url http://.../foo ~/.cache/image-create/foo
SCRIPT_NAME=$(basename $0)
SCRIPT_HOME=$(dirname $0)
FORCE_REVALIDATE=0
function show_options () {
    echo "Usage: $SCRIPT_NAME [options] <url> <destination_file>"
    echo
    echo "Download a URL and cache it to a specified location."
    echo "Subsequent requests will compare the last modified date"
    echo "of the upstream file to determine whether it needs to be"
    echo "downloaded again."
    echo
    echo "Options:"
    echo "      -f          -- force upstream caches to fetch a new copy of the file"
    echo "      -h          -- show this help"
    echo
    exit $1
}
TEMP=$(getopt -o hf -n $SCRIPT_NAME -- "$@")
if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi
# Note the quotes around `$TEMP': they are essential!
eval set -- "$TEMP"
while true ; do
    case "$1" in
        -h|"-?") show_options 0;;
        -f) FORCE_REVALIDATE=1; shift 1;;
        --) shift; break;;
        *) echo "Error: unsupported option $1." ; exit 1 ;;
    esac
done
url=$1
dest=$2
time_cond=
curl_opts=(--retry 3 --retry-delay 30)
if [ -z $url -o -z $dest ] ; then
    show_options 1
fi
if [ -p $dest ]; then
    type="fifo"
    tmp=$(mktemp --tmpdir download.XXXXXXXX)
else
    type="normal"
    mkdir -p $(dirname $dest)
    tmp=$(mktemp $(dirname $dest)/.download.XXXXXXXX)
fi
if [ "$FORCE_REVALIDATE" = "1" ]; then
    curl_opts+=(-H "Pragma: no-cache, must-revalidate" -H "Cache-Control: no-cache, must-revalidate")
    success="Downloaded and cached $url, having forced upstream caches to revalidate"
elif [ -f $dest -a -s $dest ] ; then
    time_cond="-z $dest"
    success="Server copy has changed. Using server version of $url"
else
    success="Downloaded and cached $url for the first time"
fi
rcode=$(curl ${CURL_DASH_V} -L -o $tmp -w '%{http_code}' --connect-timeout 10 "${curl_opts[@]}" $url $time_cond)
if [ "$rcode" == "200" -o "${url:0:7}" == "file://" ] ; then
    # In cases where servers ignore the Modified time,
    # curl cancels the download, outputs a 200 and leaves
    # the output file untouched, we don't want this empty file.
    if [ -n "$time_cond" -a ! -s $tmp ] ; then
        echo "Ignoring empty file returned by curl. Using locally cached $url"
        rm -f $tmp
    else
        echo $success
        if [ "fifo" = "$type" ]; then
            cp $tmp $dest
            rm $tmp
        else
            mv $tmp $dest
        fi
    fi
    # 213 is the response to a ftp MDTM command, curl outputs a 213 as the status
    # if the url redirected to a ftp server and Not-Modified
elif [ "$rcode" = "304" -o "$rcode" = "213" ] ; then
    echo "Server copy has not changed. Using locally cached $url"
    rm -f $tmp
else
    echo "Server returned an unexpected response code. [$rcode]"
    rm -f $tmp
    # expose some error codes so the calling process might know what happened
    if [ "$rcode" = "404" ] ; then
        exit 44
    fi
    exit 1
fi