From 449dcf57c6485ae5da25ff3a0bcdb9694ee4397a Mon Sep 17 00:00:00 2001 From: bigeagle Date: Mon, 21 Aug 2017 14:38:21 +0800 Subject: [PATCH] rewritten docker-ce script --- docker-ce.sh | 52 ++++++++++++++++++++---- helpers/docker-ce-filelist.py | 76 +++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 8 deletions(-) create mode 100755 helpers/docker-ce-filelist.py diff --git a/docker-ce.sh b/docker-ce.sh index 43ef8be..684e7df 100755 --- a/docker-ce.sh +++ b/docker-ce.sh @@ -1,16 +1,52 @@ #!/bin/bash -# requires: lftp wget jq +# requires: wget, lftp, jq, python3.5, lxml, pyquery +# set -x +set -e +set -u +set -o pipefail + +_here=`dirname $(realpath $0)` +GET_FILELIST="${_here}/helpers/docker-ce-filelist.py" BASE_URL="${TUNASYNC_UPSTREAM_URL:-"https://download.docker.com/linux/"}" -function sync_docker_ce() { - repo_url="$1" - repo_dir="$2" +TMP_DIR="${TUNASYNC_WORKING_DIR}/.tmp" +mkdir -p $TMP_DIR - [ ! -d "$repo_dir" ] && mkdir -p "$repo_dir" - cd $repo_dir +REMOTE_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.remote" +LOCAL_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.local" +[[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST +[[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST - lftp "${repo_url}/" -e "mirror --verbose -P 5 --delete --only-newer; bye" +function cleanup () { + echo "cleaning up" + [[ -d ${TMP_DIR} ]] && { + rm -rf $TMP_DIR + } + [[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST + [[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST } -sync_docker_ce "${BASE_URL}" "${TUNASYNC_WORKING_DIR}/" +trap cleanup EXIT + +# download +$GET_FILELIST $BASE_URL | while read remote_url; do + dst_rel_file=${remote_url#$BASE_URL} + dst_file="${TUNASYNC_WORKING_DIR}/${dst_rel_file}" + dst_tmp_file="${TMP_DIR}/$(basename ${dst_file})" + + echo "${dst_rel_file}" >> $REMOTE_FILELIST + + echo "downloading ${remote_url}" + [[ -f ${dst_file} ]] && cp -a ${dst_file} ${dst_tmp_file} || mkdir -p `dirname ${dst_file}` + (cd ${TMP_DIR} && wget -q -N ${remote_url} && mv ${dst_tmp_file} ${dst_file}) +done + +rm -rf $TMP_DIR + +(cd ${TUNASYNC_WORKING_DIR}; find . -type f ) | sed 's+^\./++' > ${LOCAL_FILELIST} +comm <(sort $REMOTE_FILELIST) <(sort $LOCAL_FILELIST) -13 | while read file; do + file="${TUNASYNC_WORKING_DIR}/$file" + echo "deleting ${file}" + [[ -f $file ]] && rm ${file} +done diff --git a/helpers/docker-ce-filelist.py b/helpers/docker-ce-filelist.py new file mode 100755 index 0000000..2a0e9f2 --- /dev/null +++ b/helpers/docker-ce-filelist.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +import requests +from pyquery import PyQuery as pq + +meta_urls = [] + + +def is_metafile_url(url): + deb_dists=('debian', 'ubuntu', 'raspbian') + rpm_dists=('fedora', 'centos') + + for dist in deb_dists: + if '/'+dist+'/' not in url: + continue + if '/Contents-' in url: + return True + if '/binary-' in url: + return True + if 'Release' in url: + return True + + for dist in rpm_dists: + if '/'+dist+'/' not in url: + continue + if '/repodata/' in url: + return True + + return False + + +def recursive_get_filelist(base_url, filter_meta=False): + if not base_url.endswith('/'): + yield base_url + return + + r = requests.get(base_url) + if not r.ok: + return + + d = pq(r.text) + for link in d('a'): + if link.text.startswith('..'): + continue + href = base_url + link.text + if filter_meta and is_metafile_url(href): + meta_urls.append(href) + elif link.text.endswith('/'): + yield from recursive_get_filelist(href, filter_meta=filter_meta) + else: + yield href + + +def get_filelist(base_url): + yield from recursive_get_filelist(base_url, filter_meta=True) + + +def get_meta_filelist(): + for url in meta_urls: + yield from recursive_get_filelist(url, filter_meta=False) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("base_url", default="https://download.docker.com/") + args = parser.parse_args() + + for file_url in get_filelist(args.base_url): + print(file_url, flush=True) + + for file_url in get_meta_filelist(): + print(file_url, flush=True) + + +# vim: ts=4 sw=4 sts=4 expandtab