From 7bd1297d6ab42d15f5cd072160bebd9824f57875 Mon Sep 17 00:00:00 2001 From: bigeagle Date: Tue, 28 Nov 2017 00:01:42 +0800 Subject: [PATCH] use python to sync docker-ce --- docker-ce.py | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++ docker-ce.sh | 65 ----------------------- 2 files changed, 142 insertions(+), 65 deletions(-) create mode 100644 docker-ce.py delete mode 100755 docker-ce.sh diff --git a/docker-ce.py b/docker-ce.py new file mode 100644 index 0000000..ccfbacd --- /dev/null +++ b/docker-ce.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +import os +import subprocess as sp +from pathlib import Path +from email.utils import parsedate_to_datetime + +import requests +from pyquery import PyQuery as pq + + +BASE_URL = os.getenv("TUNASYNC_UPSTREAM_URL", "https://download.docker.com/") +WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR") + + +class RemoteSite: + + def __init__(self, base_url=BASE_URL): + if not base_url.endswith('/'): + base_url = base_url + '/' + self.base_url = base_url + self.meta_urls = [] + + def is_metafile_url(self, url): + deb_dists=('debian', 'ubuntu', 'raspbian') + rpm_dists=('fedora', 'centos') + + for dist in deb_dists: + if '/'+dist+'/' not in url: + continue + if '/Contents-' in url: + return True + if '/binary-' in url: + return True + if 'Release' in url: + return True + + for dist in rpm_dists: + if '/'+dist+'/' not in url: + continue + if '/repodata/' in url: + return True + + return False + + def recursive_get_filelist(self, base_url, filter_meta=False): + if not base_url.endswith('/'): + yield base_url + return + + r = requests.get(base_url) + if not r.ok: + return + + d = pq(r.text) + for link in d('a'): + if link.text.startswith('..'): + continue + href = base_url + link.text + if filter_meta and self.is_metafile_url(href): + self.meta_urls.append(href) + elif link.text.endswith('/'): + yield from self.recursive_get_filelist(href, filter_meta=filter_meta) + else: + yield href + + def relpath(self, url): + assert url.startswith(self.base_url) + return url[len(self.base_url):] + + @property + def files(self): + yield from self.recursive_get_filelist(self.base_url, filter_meta=True) + for url in self.meta_urls: + yield from self.recursive_get_filelist(url, filter_meta=False) + + +def curl_download(remote_url: str, dst_file: Path): + sp.check_call([ + "curl", "-o", str(dst_file), + "-sL", "--remote-time", "--show-error", + "--fail", remote_url, + ]) + + +def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--base-url", default=BASE_URL) + parser.add_argument("--working-dir", default=WORKING_DIR) + args = parser.parse_args() + + if args.working_dir is None: + raise Exception("Working Directory is None") + + working_dir = Path(args.working_dir) + + remote_filelist = [] + rs = RemoteSite(args.base_url) + for url in rs.files: + dst_file = working_dir / rs.relpath(url) + remote_filelist.append(dst_file.relative_to(working_dir)) + + if dst_file.is_file(): + r = requests.head(url) + remote_filesize = int(r.headers['content-length']) + remote_date = parsedate_to_datetime(r.headers['last-modified']) + stat = dst_file.stat() + local_filesize = stat.st_size + local_mtime = stat.st_mtime + + if remote_filesize == local_filesize and remote_date.timestamp() == local_mtime: + print("Skipping", dst_file.relative_to(working_dir), flush=True) + continue + + dst_file.unlink() + else: + dst_file.parent.mkdir(parents=True, exist_ok=True) + + print("downloading", url, flush=True) + try: + curl_download(url, dst_file) + except Exception: + print("Failed to download", url, flush=True) + if dst_file.is_file(): + dst_file.unlink() + + local_filelist = [] + for local_file in working_dir.glob('**/*'): + if local_file.is_file(): + local_filelist.append(local_file.relative_to(working_dir)) + + for old_file in set(local_filelist) - set(remote_filelist): + print("deleting", old_file, flush=True) + old_file = working_dir / old_file + old_file.unlink() + + +if __name__ == "__main__": + main() + + +# vim: ts=4 sw=4 sts=4 expandtab diff --git a/docker-ce.sh b/docker-ce.sh deleted file mode 100755 index 7e92151..0000000 --- a/docker-ce.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash -# requires: lftp, jq, python3.5, lxml, pyquery -# set -x -set -e -set -u -set -o pipefail - -_here=`dirname $(realpath $0)` -GET_FILELIST="${_here}/helpers/docker-ce-filelist.py" - -BASE_URL="${TUNASYNC_UPSTREAM_URL:-"https://download.docker.com/"}" - -REMOTE_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.remote" -LOCAL_FILELIST="${TUNASYNC_WORKING_DIR}/.filelist.local" -[[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST -[[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST - -function cleanup () { - echo "cleaning up" - [[ -f $REMOTE_FILELIST ]] && rm $REMOTE_FILELIST || true - [[ -f $LOCAL_FILELIST ]] && rm $LOCAL_FILELIST || true -} - -trap cleanup EXIT - -# download -while read remote_url; do - dst_rel_file=${remote_url#$BASE_URL} - dst_file="${TUNASYNC_WORKING_DIR}/${dst_rel_file}" - dst_dir=`dirname ${dst_file}` - - echo "${dst_rel_file}" >> $REMOTE_FILELIST - - if [[ -f ${dst_file} ]]; then - remote_meta=`curl -sI "${remote_url}"` - remote_filesize=`echo -e "$remote_meta" | grep -i '^content-length:' | awk '{print $2}' | tr -d '\n\r' || echo 0` - remote_date=`echo -e "$remote_meta" | grep -i '^last-modified:' | sed 's/^last-modified: //I' | tr -d '\n\r' || echo 0` - remote_date=`date --date="${remote_date}" +%s` - - local_filesize=`stat -c "%s" ${dst_file}` - local_date=`stat -c "%Y" ${dst_file}` - - if (( ${remote_filesize} == ${local_filesize} && ${remote_date} == ${local_date} )) ; then - echo "skipping ${dst_rel_file}" - continue - fi - rm $dst_file - else - mkdir -p $dst_dir - fi - - echo "downloading ${remote_url}" - curl -o ${dst_file} -s -L --remote-time --show-error --fail ${remote_url} || { - echo "Failed: ${remote_url}" - [[ -f ${dst_file} ]] && rm ${dst_file} - } -done < <($GET_FILELIST $BASE_URL) - -# remove old files -(cd ${TUNASYNC_WORKING_DIR}; find . -type f ) | sed 's+^\./++' > ${LOCAL_FILELIST} -comm <(sort $REMOTE_FILELIST) <(sort $LOCAL_FILELIST) -13 | while read file; do - file="${TUNASYNC_WORKING_DIR}/$file" - echo "deleting ${file}" - [[ -f $file ]] && rm ${file} -done