From 0a875d4d1a5b5daf53ff11fe2e4c0c1fed5d335a Mon Sep 17 00:00:00 2001 From: bigeagle Date: Mon, 5 Dec 2016 12:41:45 +0800 Subject: [PATCH] rewritten anaconda installer mirror script --- anaconda.sh | 66 ++++++++++++++++++++++++++---------- helpers/anaconda-filelist.py | 37 ++++++++++++++++++++ helpers/tf-xml-filelist.py | 2 +- 3 files changed, 86 insertions(+), 19 deletions(-) create mode 100755 helpers/anaconda-filelist.py diff --git a/anaconda.sh b/anaconda.sh index bc53fad..8549881 100755 --- a/anaconda.sh +++ b/anaconda.sh @@ -1,11 +1,14 @@ #!/bin/bash -# requires: wget, lftp, jq -# +# requires: wget, lftp, jq, python3.5, lxml, pyquery set -e +set -u set -o pipefail -CONDA_REPO_BASE=${CONDA_REPO_BASE:-"http://repo.continuum.io"} +_here=`dirname $(realpath $0)` +HTMLPARSE="${_here}/helpers/anaconda-filelist.py" + +CONDA_REPO_BASE="${CONDA_REPO_BASE:-"https://repo.continuum.io"}" LOCAL_DIR_BASE="${TUNASYNC_WORKING_DIR}/pkgs" TMP_DIR=$(mktemp -d) @@ -37,6 +40,44 @@ trap cleanup EXIT echo ${TMP_DIR} + +function sync_installer() { + repo_url="$1" + repo_dir="$2" + + [[ ! -d "$repo_dir" ]] && mkdir -p "$repo_dir" + cd $repo_dir + # lftp "${repo_url}/" -e "mirror --verbose -P 5; bye" + + while read -a tokens; do + fname=${tokens[0]} + pkgmd5=${tokens[2]} + + dest_file="${repo_dir}${fname}" + pkg_url="${repo_url}${fname}" + pkgsize=`curl --head -s ${pkg_url} | grep 'Content-Length' | awk '{print $2}' | tr -d '\r'` + + declare downloaded=false + if [[ -f ${dest_file} ]]; then + rsize=`stat -c "%s" ${dest_file}` + if (( ${rsize} == ${pkgsize} )); then + downloaded=true + echo "Skipping ${fname}, size ${pkgsize}" + fi + fi + while [[ $downloaded != true ]]; do + echo "downloading ${pkg_url}" + wget -q -O ${dest_file} ${pkg_url} && { + # two space for md5sum check format + { md5sum -c - < <(echo "${pkgmd5} ${dest_file}"); } && downloaded=true + } + done + done < <(wget -O- ${repo_url} | $HTMLPARSE) +} + +sync_installer "${CONDA_REPO_BASE}/archive/" "${TUNASYNC_WORKING_DIR}/archive/" +sync_installer "${CONDA_REPO_BASE}/miniconda/" "${TUNASYNC_WORKING_DIR}/miniconda/" + for repo in ${CONDA_REPOS[@]}; do for arch in ${CONDA_ARCHES[@]}; do PKG_REPO_BASE="${CONDA_REPO_BASE}/pkgs/$repo/$arch" @@ -62,18 +103,18 @@ for repo in ${CONDA_REPOS[@]}; do dest_file="${LOCAL_DIR}/${pkgfile}" declare downloaded=false - if [ -f ${dest_file} ]; then + if [[ -f ${dest_file} ]]; then rsize=`stat -c "%s" ${dest_file}` - if [ ${rsize} -eq ${pkgsize} ]; then + if (( ${rsize} == ${pkgsize} )); then downloaded=true echo "Skipping ${pkgfile}, size ${pkgsize}" fi fi - while [ $downloaded != true ]; do + while [[ $downloaded != true ]]; do echo "downloading ${pkg_url}" wget -q -O ${dest_file} ${pkg_url} && { # two space for md5sum check format - echo "${pkgmd5} ${dest_file}" | md5sum -c - && downloaded=true + { md5sum -c - < <(echo "${pkgmd5} ${dest_file}"); } && downloaded=true } done done @@ -83,14 +124,3 @@ for repo in ${CONDA_REPOS[@]}; do done done -function sync_installer() { - repo_url="$1" - repo_dir="$2" - - [ ! -d "$repo_dir" ] && mkdir -p "$repo_dir" - cd $repo_dir - lftp "${repo_url}/" -e "mirror --verbose -P 5; bye" -} - -sync_installer "${CONDA_REPO_BASE}/archive/" "${TUNASYNC_WORKING_DIR}/archive/" -sync_installer "${CONDA_REPO_BASE}/miniconda/" "${TUNASYNC_WORKING_DIR}/miniconda/" diff --git a/helpers/anaconda-filelist.py b/helpers/anaconda-filelist.py new file mode 100755 index 0000000..bdcf06f --- /dev/null +++ b/helpers/anaconda-filelist.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +from datetime import datetime +from pyquery import PyQuery as pq + + +def get_filelist(htmlstring): + d = pq(htmlstring) + for tr in d('table').find('tr'): + tds = pq(tr).find('td') + if len(tds) != 4: + continue + fname = tds[0].find('a').text + mdate = tds[2].text + md5 = tds[3].text + ts = datetime.strptime(mdate, "%Y-%m-%d %H:%M:%S").strftime("%s") + yield (fname, ts, md5) + + +if __name__ == "__main__": + import argparse + import fileinput + + parser = argparse.ArgumentParser() + parser.add_argument("htmlfile", nargs='?', default="-") + args = parser.parse_args() + + if args.htmlfile == "-": + htmlstring = '\n'.join([line for line in fileinput.input()]) + else: + with open(args.htmlfile) as f: + htmlstring = f.read() + + for file_record in get_filelist(htmlstring): + print("\t".join(file_record)) + + +# vim: ts=4 sw=4 sts=4 expandtab diff --git a/helpers/tf-xml-filelist.py b/helpers/tf-xml-filelist.py index d534900..68557d0 100755 --- a/helpers/tf-xml-filelist.py +++ b/helpers/tf-xml-filelist.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -import argparse import xml.etree.ElementTree as ET + def get_filelist(xmlstring): r = ET.fromstring(xmlstring) ns = {