mirror of
https://github.com/tuna/tunasync-scripts.git
synced 2025-04-20 20:52:43 +00:00
rewrite anaconda script with python
This commit is contained in:
parent
7b86366c56
commit
e0cf1e6d53
196
anaconda.py
Normal file
196
anaconda.py
Normal file
@ -0,0 +1,196 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import tempfile
|
||||
import shutil
|
||||
import logging
|
||||
import subprocess as sp
|
||||
from pathlib import Path
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
import requests
|
||||
from pyquery import PyQuery as pq
|
||||
|
||||
DEFAULT_CONDA_REPO_BASE="https://repo.continuum.io"
|
||||
DEFAULT_CONDA_CLOUD_BASE="https://conda.anaconda.org"
|
||||
|
||||
CONDA_REPO_BASE_URL=os.getenv("CONDA_REPO_URL", "https://repo.continuum.io")
|
||||
CONDA_CLOUD_BASE_URL=os.getenv("CONDA_COULD_URL", "https://conda.anaconda.org")
|
||||
|
||||
WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR")
|
||||
|
||||
CONDA_REPOS=("main", "free", "r", "mro", "pro")
|
||||
CONDA_ARCHES=(
|
||||
"noarch", "linux-64", "linux-32", "linux-armv6l", "linux-armv7l",
|
||||
"linux-ppc64le", "osx-64", "osx-32", "win-64", "win-32"
|
||||
)
|
||||
|
||||
CONDA_CLOUD_REPOS=(
|
||||
"conda-forge/linux-64", "conda-forge/osx-64", "conda-forge/win-64", "conda-forge/noarch",
|
||||
"msys2/win-64", "msys2/noarch",
|
||||
"bioconda/noarch", "bioconda/linux-64", "bioconda/osx-64",
|
||||
"menpo/linux-64", "menpo/osx-64", "menpo/win-64", "menpo/win-32", "menpo/noarch",
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[%(asctime)s] [%(levelname)s] %(message)s",
|
||||
)
|
||||
|
||||
|
||||
def md5_check(file: Path, md5: str=None):
|
||||
m = hashlib.md5()
|
||||
with file.open('rb') as f:
|
||||
while True:
|
||||
buf = f.read(1*1024*1024)
|
||||
if not buf:
|
||||
break
|
||||
m.update(buf)
|
||||
return m.hexdigest() == md5
|
||||
|
||||
|
||||
def curl_download(remote_url: str, dst_file: Path, md5: str=None):
|
||||
sp.check_call([
|
||||
"curl", "-o", str(dst_file),
|
||||
"-sL", "--remote-time", "--show-error",
|
||||
"--fail", remote_url,
|
||||
])
|
||||
if md5 and (not md5_check(dst_file, md5)):
|
||||
return "MD5 mismatch"
|
||||
|
||||
|
||||
def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path):
|
||||
logging.info("Start syncing {}".format(repo_url))
|
||||
local_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
repodata_url = repo_url + '/repodata.json'
|
||||
bz2_repodata_url = repo_url + '/repodata.json.bz2'
|
||||
|
||||
tmp_repodata = tmpdir / "repodata.json"
|
||||
tmp_bz2_repodata = tmpdir / "repodata.json.bz2"
|
||||
|
||||
curl_download(repodata_url, tmp_repodata)
|
||||
curl_download(bz2_repodata_url, tmp_bz2_repodata)
|
||||
|
||||
with tmp_repodata.open() as f:
|
||||
repodata = json.load(f)
|
||||
|
||||
packages = repodata['packages']
|
||||
for filename, meta in packages.items():
|
||||
file_size, md5 = meta['size'], meta['md5']
|
||||
|
||||
pkg_url = '/'.join([repo_url, filename])
|
||||
dst_file = local_dir / filename
|
||||
|
||||
if dst_file.is_file():
|
||||
stat = dst_file.stat()
|
||||
local_filesize = stat.st_size
|
||||
|
||||
if file_size == local_filesize:
|
||||
logging.info("Skipping {}".format(filename))
|
||||
continue
|
||||
|
||||
dst_file.unlink()
|
||||
|
||||
for retry in range(3):
|
||||
logging.info("Downloading {}".format(filename))
|
||||
err = curl_download(pkg_url, dst_file, md5=md5)
|
||||
if err is None:
|
||||
break
|
||||
logging.error("Failed to download {}: {}".format(filename, err))
|
||||
|
||||
shutil.move(str(tmp_repodata), str(local_dir / "repodata.json"))
|
||||
shutil.move(str(tmp_bz2_repodata), str(local_dir / "repodata.json.bz2"))
|
||||
|
||||
|
||||
def sync_installer(repo_url, local_dir: Path):
|
||||
logging.info("Start syncing {}".format(repo_url))
|
||||
local_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def remote_list():
|
||||
r = requests.get(repo_url)
|
||||
d = pq(r.content)
|
||||
for tr in d('table').find('tr'):
|
||||
tds = pq(tr).find('td')
|
||||
if len(tds) != 4:
|
||||
continue
|
||||
fname = tds[0].find('a').text
|
||||
md5 = tds[3].text
|
||||
yield (fname, md5)
|
||||
|
||||
for filename, md5 in remote_list():
|
||||
pkg_url = "/".join([repo_url, filename])
|
||||
dst_file = local_dir / filename
|
||||
|
||||
if dst_file.is_file():
|
||||
r = requests.head(pkg_url)
|
||||
remote_filesize = int(r.headers['content-length'])
|
||||
remote_date = parsedate_to_datetime(r.headers['last-modified'])
|
||||
stat = dst_file.stat()
|
||||
local_filesize = stat.st_size
|
||||
local_mtime = stat.st_mtime
|
||||
|
||||
if remote_filesize == local_filesize and remote_date.timestamp() == local_mtime:
|
||||
logging.info("Skipping {}".format(filename))
|
||||
continue
|
||||
|
||||
dst_file.unlink()
|
||||
|
||||
for retry in range(3):
|
||||
logging.info("Downloading {}".format(filename))
|
||||
err = curl_download(pkg_url, dst_file, md5=md5)
|
||||
if err is None:
|
||||
break
|
||||
logging.error("Failed to download {}: {}".format(filename, err))
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--working-dir", default=WORKING_DIR)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.working_dir is None:
|
||||
raise Exception("Working Directory is None")
|
||||
|
||||
working_dir = Path(args.working_dir)
|
||||
|
||||
for dist in ("archive", "miniconda"):
|
||||
remote_url = "{}/{}".format(CONDA_REPO_BASE_URL, dist)
|
||||
local_dir = working_dir / dist
|
||||
try:
|
||||
sync_installer(remote_url, local_dir)
|
||||
except Exception:
|
||||
logging.exception("Failed to sync installers of {}".format(dist))
|
||||
|
||||
for repo in CONDA_REPOS:
|
||||
for arch in CONDA_ARCHES:
|
||||
remote_url = "{}/pkgs/{}/{}".format(CONDA_REPO_BASE_URL, repo, arch)
|
||||
local_dir = working_dir / "pkgs" / repo / arch
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
sync_repo(remote_url, local_dir, Path(tmpdir))
|
||||
except Exception:
|
||||
logging.exception("Failed to sync repo: {}/{}".format(repo, arch))
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
for repo in CONDA_CLOUD_REPOS:
|
||||
remote_url = "{}/{}".format(CONDA_CLOUD_BASE_URL, repo)
|
||||
local_dir = working_dir / "cloud" / repo
|
||||
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
sync_repo(remote_url, local_dir, Path(tmpdir))
|
||||
except Exception:
|
||||
logging.exception("Failed to sync repo: {}/{}".format(repo, arch))
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
# vim: ts=4 sw=4 sts=4 expandtab
|
185
anaconda.sh
185
anaconda.sh
@ -1,185 +0,0 @@
|
||||
#!/bin/bash
|
||||
# requires: wget, lftp, jq, python3.5, lxml, pyquery
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
_here=`dirname $(realpath $0)`
|
||||
HTMLPARSE="${_here}/helpers/anaconda-filelist.py"
|
||||
|
||||
DEFAULT_CONDA_REPO_BASE="https://repo.continuum.io"
|
||||
DEFAULT_CONDA_CLOUD_BASE="https://conda.anaconda.org"
|
||||
|
||||
CONDA_REPO_BASE="${CONDA_REPO_BASE:-$DEFAULT_CONDA_REPO_BASE}"
|
||||
CONDA_CLOUD_BASE="${CONDA_CLOUD_BASE:-$DEFAULT_CONDA_CLOUD_BASE}"
|
||||
|
||||
LOCAL_DIR_BASE="${TUNASYNC_WORKING_DIR}"
|
||||
|
||||
TMP_DIR=$(mktemp -d)
|
||||
|
||||
CONDA_REPOS=("main" "free" "r" "mro" "pro")
|
||||
CONDA_ARCHES=("noarch" "linux-64" "linux-32" "linux-armv6l" "linux-armv7l" "linux-ppc64le" "osx-64" "osx-32" "win-64" "win-32")
|
||||
|
||||
CONDA_CLOUD_REPOS=(
|
||||
"conda-forge/linux-64" "conda-forge/osx-64" "conda-forge/win-64" "conda-forge/noarch"
|
||||
"msys2/win-64" "msys2/noarch"
|
||||
"bioconda/noarch" "bioconda/linux-64" "bioconda/osx-64"
|
||||
"menpo/linux-64" "menpo/osx-64" "menpo/win-64" "menpo/win-32" "menpo/noarch"
|
||||
)
|
||||
|
||||
EXIT_STATUS=0
|
||||
EXIT_MSG=""
|
||||
|
||||
function check-and-download () {
|
||||
remote_file=$1
|
||||
local_file=$2
|
||||
wget -q --spider ${remote_file}
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "downloading ${remote_file}"
|
||||
wget -q -O ${local_file} ${remote_file}
|
||||
return
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
function cleanup () {
|
||||
echo "cleaning up"
|
||||
[[ -d ${TMP_DIR} ]] && {
|
||||
[[ -f ${TMP_DIR}/repodata.json ]] && rm ${TMP_DIR}/repodata.json
|
||||
[[ -f ${TMP_DIR}/repodata.json.bz2 ]] && rm ${TMP_DIR}/repodata.json.bz2
|
||||
[[ -f ${TMP_DIR}/failed ]] && rm ${TMP_DIR}/failed
|
||||
rmdir ${TMP_DIR}
|
||||
}
|
||||
}
|
||||
|
||||
function download-with-checksum () {
|
||||
local pkg_url=$1
|
||||
local dest_file=$2
|
||||
local pkgmd5=$3
|
||||
|
||||
local declare downloaded=false
|
||||
local trials=0
|
||||
|
||||
while [[ $downloaded != true ]]; do
|
||||
echo "downloading ${pkg_url}"
|
||||
wget -q -O ${dest_file} ${pkg_url} && {
|
||||
# two space for md5sum check format
|
||||
{ md5sum -c - < <(echo "${pkgmd5} ${dest_file}"); } && downloaded=true || trials=$((trials + 1))
|
||||
} || {
|
||||
trials=$((trials + 1))
|
||||
}
|
||||
if (( $trials > 3 )); then
|
||||
return 1
|
||||
fi
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
function sync_installer () {
|
||||
repo_url="$1"
|
||||
repo_dir="$2"
|
||||
|
||||
[[ ! -d "$repo_dir" ]] && mkdir -p "$repo_dir"
|
||||
cd $repo_dir
|
||||
|
||||
while read -a tokens; do
|
||||
fname=${tokens[0]}
|
||||
pkgmd5=${tokens[2]}
|
||||
|
||||
dest_file="${repo_dir}${fname}"
|
||||
pkg_url="${repo_url}${fname}"
|
||||
pkgsize=`curl --head -s ${pkg_url} | grep 'Content-Length' | awk '{print $2}' | tr -d '\r'`
|
||||
|
||||
if [[ -f ${dest_file} ]]; then
|
||||
rsize=`stat -c "%s" ${dest_file}`
|
||||
if (( ${rsize} == ${pkgsize} )); then
|
||||
echo "Skipping ${fname}, size ${pkgsize}"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
download-with-checksum ${pkg_url} ${dest_file} ${pkgmd5} || {
|
||||
echo "Failed to download ${pkg_url}: checksum mismatch"
|
||||
echo ${pkg_url} >> ${TMP_DIR}/failed
|
||||
EXIT_STATUS=2
|
||||
EXIT_MSG="some files has bad checksum."
|
||||
}
|
||||
done < <(wget -O- ${repo_url} | $HTMLPARSE)
|
||||
}
|
||||
|
||||
function sync_repo () {
|
||||
local repo_url="$1"
|
||||
local local_dir="$2"
|
||||
|
||||
[[ ! -d ${local_dir} ]] && mkdir -p ${local_dir}
|
||||
|
||||
repodata_url="${repo_url}/repodata.json"
|
||||
bz2_repodata_url="${repo_url}/repodata.json.bz2"
|
||||
|
||||
tmp_repodata="${TMP_DIR}/repodata.json"
|
||||
tmp_bz2_repodata="${TMP_DIR}/repodata.json.bz2"
|
||||
|
||||
check-and-download ${repodata_url} ${tmp_repodata}
|
||||
check-and-download ${bz2_repodata_url} ${tmp_bz2_repodata}
|
||||
|
||||
jq_cmd='.packages | to_entries[] | [.key, .value.size, .value.md5] | map(tostring) | join(" ")'
|
||||
|
||||
while read line; do
|
||||
read -a tokens <<< $line
|
||||
pkgfile=${tokens[0]}
|
||||
pkgsize=${tokens[1]}
|
||||
pkgmd5=${tokens[2]}
|
||||
|
||||
pkg_url="${repo_url}/${pkgfile}"
|
||||
dest_file="${local_dir}/${pkgfile}"
|
||||
|
||||
if [[ -f ${dest_file} ]]; then
|
||||
rsize=`stat -c "%s" ${dest_file}`
|
||||
if (( ${rsize} == ${pkgsize} )); then
|
||||
echo "Skipping ${pkgfile}, size ${pkgsize}"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
download-with-checksum ${pkg_url} ${dest_file} ${pkgmd5} || {
|
||||
echo "Failed to download ${pkg_url}: checksum mismatch"
|
||||
echo ${pkg_url} >> ${TMP_DIR}/failed
|
||||
EXIT_MSG="some files has bad checksum."
|
||||
}
|
||||
|
||||
done < <(bzip2 -c -d ${tmp_bz2_repodata} | jq -r "${jq_cmd}")
|
||||
|
||||
mv -f "${TMP_DIR}/repodata.json" "${local_dir}/repodata.json"
|
||||
mv -f "${TMP_DIR}/repodata.json.bz2" "${local_dir}/repodata.json.bz2"
|
||||
}
|
||||
|
||||
sync_installer "${CONDA_REPO_BASE}/archive/" "${LOCAL_DIR_BASE}/archive/"
|
||||
sync_installer "${CONDA_REPO_BASE}/miniconda/" "${LOCAL_DIR_BASE}/miniconda/"
|
||||
|
||||
for repo in ${CONDA_REPOS[@]}; do
|
||||
for arch in ${CONDA_ARCHES[@]}; do
|
||||
remote_url="${CONDA_REPO_BASE}/pkgs/$repo/$arch"
|
||||
local_dir="${LOCAL_DIR_BASE}/pkgs/$repo/$arch"
|
||||
|
||||
sync_repo "${remote_url}" "${local_dir}" || true
|
||||
done
|
||||
done
|
||||
|
||||
for repo in ${CONDA_CLOUD_REPOS[@]}; do
|
||||
remote_url="${CONDA_CLOUD_BASE}/${repo}"
|
||||
local_dir="${LOCAL_DIR_BASE}/cloud/${repo}"
|
||||
|
||||
sync_repo "${remote_url}" "${local_dir}" || true
|
||||
done
|
||||
|
||||
|
||||
[[ -f ${TMP_DIR}/failed ]] && {
|
||||
echo "failed to download following packages:"
|
||||
cat ${TMP_DIR}/failed
|
||||
mv ${TMP_DIR}/failed ${LOCAL_DIR_BASE}/failed_packages.txt
|
||||
}
|
||||
|
||||
[[ -z $EXIT_MSG ]] || echo $EXIT_MSG
|
||||
exit $EXIT_STATUS
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from datetime import datetime
|
||||
from pyquery import PyQuery as pq
|
||||
|
||||
|
||||
def get_filelist(htmlstring):
|
||||
d = pq(htmlstring)
|
||||
for tr in d('table').find('tr'):
|
||||
tds = pq(tr).find('td')
|
||||
if len(tds) != 4:
|
||||
continue
|
||||
fname = tds[0].find('a').text
|
||||
mdate = tds[2].text
|
||||
md5 = tds[3].text
|
||||
ts = datetime.strptime(mdate, "%Y-%m-%d %H:%M:%S").strftime("%s")
|
||||
yield (fname, ts, md5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import fileinput
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("htmlfile", nargs='?', default="-")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.htmlfile == "-":
|
||||
htmlstring = '\n'.join([line for line in fileinput.input()])
|
||||
else:
|
||||
with open(args.htmlfile) as f:
|
||||
htmlstring = f.read()
|
||||
|
||||
for file_record in get_filelist(htmlstring):
|
||||
print("\t".join(file_record))
|
||||
|
||||
|
||||
# vim: ts=4 sw=4 sts=4 expandtab
|
Loading…
x
Reference in New Issue
Block a user