rewritten docker-ce script

This commit is contained in:
bigeagle 2017-08-21 14:38:21 +08:00
parent 832510e9b9
commit 449dcf57c6
2 changed files with 120 additions and 8 deletions

View File

@ -1,16 +1,52 @@
#!/bin/bash
# requires: lftp wget jq
# requires: wget, lftp, jq, python3.5, lxml, pyquery
# set -x
set -e
set -u
set -o pipefail
_here=`dirname $(realpath $0)`
GET_FILELIST="${_here}/helpers/docker-ce-filelist.py"
BASE_URL="${TUNASYNC_UPSTREAM_URL:-"https://download.docker.com/linux/"}"
function sync_docker_ce() {
repo_url="$1"
repo_dir="$2"
TMP_DIR="${TUNASYNC_WORKING_DIR}/.tmp"
mkdir -p $TMP_DIR
[ ! -d "$repo_dir" ] && mkdir -p "$repo_dir"
cd $repo_dir
REMOTE_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.remote"
LOCAL_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.local"
[[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST
[[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST
lftp "${repo_url}/" -e "mirror --verbose -P 5 --delete --only-newer; bye"
function cleanup () {
echo "cleaning up"
[[ -d ${TMP_DIR} ]] && {
rm -rf $TMP_DIR
}
[[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST
[[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST
}
sync_docker_ce "${BASE_URL}" "${TUNASYNC_WORKING_DIR}/"
trap cleanup EXIT
# download
$GET_FILELIST $BASE_URL | while read remote_url; do
dst_rel_file=${remote_url#$BASE_URL}
dst_file="${TUNASYNC_WORKING_DIR}/${dst_rel_file}"
dst_tmp_file="${TMP_DIR}/$(basename ${dst_file})"
echo "${dst_rel_file}" >> $REMOTE_FILELIST
echo "downloading ${remote_url}"
[[ -f ${dst_file} ]] && cp -a ${dst_file} ${dst_tmp_file} || mkdir -p `dirname ${dst_file}`
(cd ${TMP_DIR} && wget -q -N ${remote_url} && mv ${dst_tmp_file} ${dst_file})
done
rm -rf $TMP_DIR
(cd ${TUNASYNC_WORKING_DIR}; find . -type f ) | sed 's+^\./++' > ${LOCAL_FILELIST}
comm <(sort $REMOTE_FILELIST) <(sort $LOCAL_FILELIST) -13 | while read file; do
file="${TUNASYNC_WORKING_DIR}/$file"
echo "deleting ${file}"
[[ -f $file ]] && rm ${file}
done

76
helpers/docker-ce-filelist.py Executable file
View File

@ -0,0 +1,76 @@
#!/usr/bin/env python3
import requests
from pyquery import PyQuery as pq
meta_urls = []
def is_metafile_url(url):
deb_dists=('debian', 'ubuntu', 'raspbian')
rpm_dists=('fedora', 'centos')
for dist in deb_dists:
if '/'+dist+'/' not in url:
continue
if '/Contents-' in url:
return True
if '/binary-' in url:
return True
if 'Release' in url:
return True
for dist in rpm_dists:
if '/'+dist+'/' not in url:
continue
if '/repodata/' in url:
return True
return False
def recursive_get_filelist(base_url, filter_meta=False):
if not base_url.endswith('/'):
yield base_url
return
r = requests.get(base_url)
if not r.ok:
return
d = pq(r.text)
for link in d('a'):
if link.text.startswith('..'):
continue
href = base_url + link.text
if filter_meta and is_metafile_url(href):
meta_urls.append(href)
elif link.text.endswith('/'):
yield from recursive_get_filelist(href, filter_meta=filter_meta)
else:
yield href
def get_filelist(base_url):
yield from recursive_get_filelist(base_url, filter_meta=True)
def get_meta_filelist():
for url in meta_urls:
yield from recursive_get_filelist(url, filter_meta=False)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("base_url", default="https://download.docker.com/")
args = parser.parse_args()
for file_url in get_filelist(args.base_url):
print(file_url, flush=True)
for file_url in get_meta_filelist():
print(file_url, flush=True)
# vim: ts=4 sw=4 sts=4 expandtab