rewritten anaconda installer mirror script

This commit is contained in:
bigeagle 2016-12-05 12:41:45 +08:00
parent c2e227e7ce
commit 0a875d4d1a
3 changed files with 86 additions and 19 deletions

View File

@ -1,11 +1,14 @@
#!/bin/bash #!/bin/bash
# requires: wget, lftp, jq # requires: wget, lftp, jq, python3.5, lxml, pyquery
#
set -e set -e
set -u
set -o pipefail set -o pipefail
CONDA_REPO_BASE=${CONDA_REPO_BASE:-"http://repo.continuum.io"} _here=`dirname $(realpath $0)`
HTMLPARSE="${_here}/helpers/anaconda-filelist.py"
CONDA_REPO_BASE="${CONDA_REPO_BASE:-"https://repo.continuum.io"}"
LOCAL_DIR_BASE="${TUNASYNC_WORKING_DIR}/pkgs" LOCAL_DIR_BASE="${TUNASYNC_WORKING_DIR}/pkgs"
TMP_DIR=$(mktemp -d) TMP_DIR=$(mktemp -d)
@ -37,6 +40,44 @@ trap cleanup EXIT
echo ${TMP_DIR} echo ${TMP_DIR}
function sync_installer() {
repo_url="$1"
repo_dir="$2"
[[ ! -d "$repo_dir" ]] && mkdir -p "$repo_dir"
cd $repo_dir
# lftp "${repo_url}/" -e "mirror --verbose -P 5; bye"
while read -a tokens; do
fname=${tokens[0]}
pkgmd5=${tokens[2]}
dest_file="${repo_dir}${fname}"
pkg_url="${repo_url}${fname}"
pkgsize=`curl --head -s ${pkg_url} | grep 'Content-Length' | awk '{print $2}' | tr -d '\r'`
declare downloaded=false
if [[ -f ${dest_file} ]]; then
rsize=`stat -c "%s" ${dest_file}`
if (( ${rsize} == ${pkgsize} )); then
downloaded=true
echo "Skipping ${fname}, size ${pkgsize}"
fi
fi
while [[ $downloaded != true ]]; do
echo "downloading ${pkg_url}"
wget -q -O ${dest_file} ${pkg_url} && {
# two space for md5sum check format
{ md5sum -c - < <(echo "${pkgmd5} ${dest_file}"); } && downloaded=true
}
done
done < <(wget -O- ${repo_url} | $HTMLPARSE)
}
sync_installer "${CONDA_REPO_BASE}/archive/" "${TUNASYNC_WORKING_DIR}/archive/"
sync_installer "${CONDA_REPO_BASE}/miniconda/" "${TUNASYNC_WORKING_DIR}/miniconda/"
for repo in ${CONDA_REPOS[@]}; do for repo in ${CONDA_REPOS[@]}; do
for arch in ${CONDA_ARCHES[@]}; do for arch in ${CONDA_ARCHES[@]}; do
PKG_REPO_BASE="${CONDA_REPO_BASE}/pkgs/$repo/$arch" PKG_REPO_BASE="${CONDA_REPO_BASE}/pkgs/$repo/$arch"
@ -62,18 +103,18 @@ for repo in ${CONDA_REPOS[@]}; do
dest_file="${LOCAL_DIR}/${pkgfile}" dest_file="${LOCAL_DIR}/${pkgfile}"
declare downloaded=false declare downloaded=false
if [ -f ${dest_file} ]; then if [[ -f ${dest_file} ]]; then
rsize=`stat -c "%s" ${dest_file}` rsize=`stat -c "%s" ${dest_file}`
if [ ${rsize} -eq ${pkgsize} ]; then if (( ${rsize} == ${pkgsize} )); then
downloaded=true downloaded=true
echo "Skipping ${pkgfile}, size ${pkgsize}" echo "Skipping ${pkgfile}, size ${pkgsize}"
fi fi
fi fi
while [ $downloaded != true ]; do while [[ $downloaded != true ]]; do
echo "downloading ${pkg_url}" echo "downloading ${pkg_url}"
wget -q -O ${dest_file} ${pkg_url} && { wget -q -O ${dest_file} ${pkg_url} && {
# two space for md5sum check format # two space for md5sum check format
echo "${pkgmd5} ${dest_file}" | md5sum -c - && downloaded=true { md5sum -c - < <(echo "${pkgmd5} ${dest_file}"); } && downloaded=true
} }
done done
done done
@ -83,14 +124,3 @@ for repo in ${CONDA_REPOS[@]}; do
done done
done done
function sync_installer() {
repo_url="$1"
repo_dir="$2"
[ ! -d "$repo_dir" ] && mkdir -p "$repo_dir"
cd $repo_dir
lftp "${repo_url}/" -e "mirror --verbose -P 5; bye"
}
sync_installer "${CONDA_REPO_BASE}/archive/" "${TUNASYNC_WORKING_DIR}/archive/"
sync_installer "${CONDA_REPO_BASE}/miniconda/" "${TUNASYNC_WORKING_DIR}/miniconda/"

37
helpers/anaconda-filelist.py Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env python3
from datetime import datetime
from pyquery import PyQuery as pq
def get_filelist(htmlstring):
d = pq(htmlstring)
for tr in d('table').find('tr'):
tds = pq(tr).find('td')
if len(tds) != 4:
continue
fname = tds[0].find('a').text
mdate = tds[2].text
md5 = tds[3].text
ts = datetime.strptime(mdate, "%Y-%m-%d %H:%M:%S").strftime("%s")
yield (fname, ts, md5)
if __name__ == "__main__":
import argparse
import fileinput
parser = argparse.ArgumentParser()
parser.add_argument("htmlfile", nargs='?', default="-")
args = parser.parse_args()
if args.htmlfile == "-":
htmlstring = '\n'.join([line for line in fileinput.input()])
else:
with open(args.htmlfile) as f:
htmlstring = f.read()
for file_record in get_filelist(htmlstring):
print("\t".join(file_record))
# vim: ts=4 sw=4 sts=4 expandtab

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
def get_filelist(xmlstring): def get_filelist(xmlstring):
r = ET.fromstring(xmlstring) r = ET.fromstring(xmlstring)
ns = { ns = {