diff --git a/Dockerfile b/Dockerfile index d6debbf..e371ed0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM python:3.7-buster +FROM debian:buster MAINTAINER Justin Wong RUN apt-get update && \ - apt-get install -y wget curl rsync lftp git jq python-dev python-pip yum-utils createrepo aria2 awscli ack composer php-curl php-zip + apt-get install -y wget curl rsync lftp git jq python3-dev python3-pip yum-utils createrepo aria2 awscli ack composer php-curl php-zip RUN STATIC_DEPS=true pip3 install pyquery RUN pip3 install requests[socks] pyyaml gsutil bandersnatch==3.6.0 diff --git a/adoptopenjdk.sh b/adoptopenjdk.sh index 96c6977..fb5e39a 100755 --- a/adoptopenjdk.sh +++ b/adoptopenjdk.sh @@ -1,5 +1,5 @@ #!/bin/bash -# requires: curl, sha256sum, awk +# requires: curl, sha256sum, awk, jq set -e BASE_PATH="${TUNASYNC_WORKING_DIR}" @@ -7,6 +7,7 @@ BASE_PATH="${TUNASYNC_WORKING_DIR}" # 参数为版本,比如8,11等 function downloadRelease() { remote_filelist="$BASE_PATH/$1/filelist" + mkdir -p "$BASE_PATH/$1" echo -n "" >$remote_filelist curl -s "https://api.adoptopenjdk.net/v2/latestAssets/releases/openjdk$1" | \ jq -r '.[]| [.version,.binary_type,.architecture,.os,.binary_name,.binary_link,.checksum_link,.installer_name,.installer_link,.installer_checksum_link]| @tsv' | \ @@ -20,12 +21,14 @@ function downloadRelease() { echo "Skiping $binary_name" downloaded=true fi - while [[ $downloaded != true ]]; do + local retry=0 + while [[ $retry -lt 3 && $downloaded != true ]]; do echo "Downloading ${dest_filename}" link="$binary_link" download_and_check && { - downloaded=true + downloaded=true } + ((retry+=1)) done if [[ ! -z "$installer_name" ]]; then dest_filename="$BASE_PATH/$version/$binary_type/$architecture/$os/$installer_name" @@ -36,13 +39,15 @@ function downloadRelease() { echo "Skiping $installer_name" downloaded=true fi - while [[ $downloaded != true ]]; do + retry=0 + while [[ $retry -lt 3 && $downloaded != true ]]; do echo "Downloading ${dest_filename}" link="$installer_link" checksum_link="$installer_checksum_link" download_and_check && { downloaded=true } + ((retry+=1)) done fi done @@ -51,12 +56,13 @@ function downloadRelease() { function clean_old_releases() { declare version=$1 declare remote_filelist="$BASE_PATH/$version/filelist" - declare local_filelist="/tmp/filelist.local" - find "$BASE_PATH/$version" -type f > ${local_filelist} - comm <(sort $remote_filelist) <(sort $local_filelist) -13 | while read file; do - echo "deleting ${file}" - rm "${file}" - done + declare local_filelist="/tmp/filelist.local" + [[ ! -f "$remote_filelist" ]] && return 0 + find "$BASE_PATH/$version" -type f > ${local_filelist} + comm <(sort $remote_filelist) <(sort $local_filelist) -13 | while read file; do + echo "deleting ${file}" + # rm "${file}" + done } function download_and_check() { @@ -67,7 +73,11 @@ function download_and_check() { "$link" curl -s -S --fail -L ${CURL_OPTIONS:-} \ -o "${dest_filename}.sha256.txt.tmp" \ - "$checksum_link" + "$checksum_link" || { + echo "Warning: ${dest_filename}.sha256.txt not exist, skipping SHA256 check" + mv "${dest_filename}.tmp" "${dest_filename}" + return 0 + } sha256sum_check && { mv "${dest_filename}.sha256.txt.tmp" "${dest_filename}.sha256.txt" mv "${dest_filename}.tmp" "${dest_filename}" @@ -78,14 +88,14 @@ function download_and_check() { function sha256sum_check() { expected=$(cat "${dest_filename}.sha256.txt.tmp" | awk '{print $1}') actual=$(sha256sum "${dest_filename}.tmp" | awk '{print $1}') - if [ "$expected" = "$actual" ]; then + if [[ "$expected" = "$actual" ]]; then return 0 else return 1 fi } -for i in 8 11 13; +for i in 8 9 10 11 12 13 14; do downloadRelease $i && clean_old_releases $i done diff --git a/dockerfiles/homebrew-mirror/Dockerfile b/dockerfiles/homebrew-mirror/Dockerfile index 342ebac..3974a6e 100644 --- a/dockerfiles/homebrew-mirror/Dockerfile +++ b/dockerfiles/homebrew-mirror/Dockerfile @@ -4,6 +4,6 @@ RUN apk add --no-cache bash ruby ruby-bigdecimal git curl ncurses ruby-json ruby RUN git clone --depth 1 https://github.com/Homebrew/brew.git /home/homebrew/.linuxbrew/homebrew && cd /home/homebrew/.linuxbrew && mkdir bin && ln -s /home/homebrew/.linuxbrew/homebrew/bin/brew /home/homebrew/.linuxbrew/bin -RUN git clone --depth 1 https://github.com/gaoyifan/homebrew-bottle-mirror.git /home/homebrew/.linuxbrew/homebrew/Library/Taps/gaoyifan/homebrew-bottle-mirror +RUN git clone --depth 1 https://github.com/z4yx/homebrew-bottle-mirror.git /home/homebrew/.linuxbrew/homebrew/Library/Taps/gaoyifan/homebrew-bottle-mirror RUN chown -R 2000 /home/homebrew diff --git a/dockerfiles/nix-channels/Dockerfile b/dockerfiles/nix-channels/Dockerfile index 7171d6d..7f3d7d4 100644 --- a/dockerfiles/nix-channels/Dockerfile +++ b/dockerfiles/nix-channels/Dockerfile @@ -1,7 +1,9 @@ -FROM python:3-buster +FROM debian:buster MAINTAINER Wang Ruikang -RUN pip3 install pyquery requests && \ +RUN apt-get update && \ + apt-get install -y python3-dev python3-pip curl && \ + pip3 install pyquery requests minio && \ # Install Nix. To simplify management we only copy binaries and create # symlinks, and do no further configuration curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \ diff --git a/dockerfiles/stackage/Dockerfile b/dockerfiles/stackage/Dockerfile deleted file mode 100644 index 22ed2bf..0000000 --- a/dockerfiles/stackage/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.6 -MAINTAINER Justin Wong - -RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie main contrib non-free" > /etc/apt/sources.list && \ - echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie-backports main contrib non-free" >> /etc/apt/sources.list && \ - echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie-updates main contrib non-free" >> /etc/apt/sources.list && \ - echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian-security/ jessie/updates main contrib non-free" >> /etc/apt/sources.list - -RUN apt-get update && \ - apt-get install -y git aria2 - -RUN pip3 install requests pyyaml - -RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && apt-get install -y locales -qq && locale-gen -ENV LANG=en_US.UTF-8 -ENV LANGUAGE=en_US.UTF-8 -ENV LC_ALL=en_US.UTF-8 - -ENV HOME=/tmp -CMD /bin/bash diff --git a/github-release.py b/github-release.py index fcc3191..0469e62 100755 --- a/github-release.py +++ b/github-release.py @@ -30,6 +30,7 @@ REPOS = [ "rust-analyzer/rust-analyzer", "kubernetes/minikube", {"repo": "iina/iina", "versions": -1, "pre_release": True, "flat": True}, + "FreeCAD/FreeCAD", ] # connect and read timeout value diff --git a/linuxbrew-bottles.sh b/linuxbrew-bottles.sh index 3e9fd67..96e1e0d 100755 --- a/linuxbrew-bottles.sh +++ b/linuxbrew-bottles.sh @@ -24,6 +24,7 @@ do repo_name="homebrew-${tap}" args="mac" fi + remote_filelist="$HOMEBREW_CACHE/filelist.txt" echo "===== SYNC STARTED AT $(date -R) =====" dir_core=/home/homebrew/.linuxbrew/homebrew/Library/Taps/homebrew/homebrew-core @@ -33,4 +34,13 @@ do echo "" echo "> RUN brew bottle-mirror $args..." /home/homebrew/.linuxbrew/bin/brew bottle-mirror "$args" + if [[ -f "$remote_filelist" ]];then # clean outdated files + local_filelist=/tmp/filelist.local + (cd ${HOMEBREW_CACHE}; find . -type f -iname "*.tmp" -delete) + (cd ${HOMEBREW_CACHE}; find . -type f -mtime 30 -iname "*.tar.gz") | sed 's+^\./++' > $local_filelist + comm <(sort $remote_filelist) <(sort $local_filelist) -13 | while read file; do + echo "deleting ${HOMEBREW_CACHE}/${file}" + rm "${HOMEBREW_CACHE}/${file}" + done + fi done diff --git a/nix-channels.py b/nix-channels.py index 7253d1a..50257c2 100755 --- a/nix-channels.py +++ b/nix-channels.py @@ -3,17 +3,21 @@ import hashlib import json import logging import lzma +import minio import os +import pytz import re -import sys import requests import subprocess +import sys from pyquery import PyQuery as pq from datetime import datetime, timedelta from pathlib import Path from concurrent.futures import ThreadPoolExecutor +from minio.credentials import Credentials, Static + from urllib3.util.retry import Retry ### Config @@ -35,7 +39,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30)) STORE_DIR = 'store' RELEASES_DIR = 'releases' -CLONE_SINCE = datetime(2018, 12, 1) + +# Channels that have not updated since migration to Netlify [1] are assumed to +# be too old and defunct. +# +# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212 +CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc) TIMEOUT = 60 working_dir = Path(WORKING_DIR) @@ -63,9 +72,6 @@ logging.basicConfig( # Don't forget 'global failure' failure = False -def http_head(*args, **kwargs): - return session.head(*args, timeout=TIMEOUT, **kwargs) - def http_get(*args, **kwargs): return session.get(*args, timeout=TIMEOUT, **kwargs) @@ -131,28 +137,15 @@ def download(url, dest): download_dest.rename(dest) -def get_links(url): - r = http_get(url) - r.raise_for_status() +credentials = Credentials(provider=Static()) +client = minio.Minio('s3.amazonaws.com', credentials=credentials) - node = pq(r.content) - - links = [] - for row in node('tr'): - td = pq(row)('td') - if len(td) != 5: - continue - - link_target = td[1].find('a').get('href') - if link_target.startswith('/'): - # Link to parent directory - continue - - last_updated = td[2].text.strip() - - links.append((link_target, last_updated)) - - return links +def get_channels(): + return [ + (x.object_name, x.last_modified) + for x in client.list_objects_v2('nix-channels') + if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name) + ] def clone_channels(): logging.info(f'- Fetching channels') @@ -161,17 +154,15 @@ def clone_channels(): working_dir.mkdir(parents=True, exist_ok=True) - for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'): + for channel, chan_updated in get_channels(): chan_path = working_dir / channel # Old channels, little value in cloning and format changes - if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE: + if chan_updated < CLONE_SINCE: continue - chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False) - chan_redirect_res.raise_for_status() - - chan_location = chan_redirect_res.headers['Location'] + chan_obj = client.get_object('nix-channels', channel) + chan_location = chan_obj.headers['x-amz-website-redirect-location'] chan_release = chan_location.split('/')[-1] diff --git a/nixos-images.py b/nixos-images.py index 9403876..9e9a8d0 100755 --- a/nixos-images.py +++ b/nixos-images.py @@ -2,6 +2,7 @@ import hashlib import logging import lzma +import minio import os import re import sys @@ -11,6 +12,9 @@ import subprocess from pyquery import PyQuery as pq from datetime import datetime, timedelta from pathlib import Path +from collections import defaultdict + +from minio.credentials import Credentials, Static from urllib3.util.retry import Retry @@ -38,14 +42,8 @@ def http_head(*args, **kwargs): def http_get(*args, **kwargs): return session.get(*args, timeout=TIMEOUT, **kwargs) -def file_sha256(dest): - sha = subprocess.check_output( - [ 'sha256sum', str(dest) ], - universal_newlines=True - ) - return sha.split(' ')[0] - def atomic_write_file(dest, contents): + dest.parent.mkdir(parents=True, exist_ok=True) tmp_dest = dest.parent / f'.{dest.name}.tmp' with tmp_dest.open('w') as f: f.write(contents) @@ -93,156 +91,51 @@ def download(url, dest): download_dest.rename(dest) -def get_links(url): - r = http_get(url) - r.raise_for_status() +credentials = Credentials(provider=Static()) +client = minio.Minio('s3.amazonaws.com', credentials=credentials) - node = pq(r.content) - - links = [] - for row in node('tr'): - td = pq(row)('td') - if len(td) != 5: - continue - - link_target = td[1].find('a').get('href') - if link_target.startswith('/'): - # Link to parent directory - continue - - last_updated = td[2].text.strip() - - links.append((link_target, last_updated)) - - return links - -def get_channel(chan_location): - release_res = http_get(chan_location) - release_res.raise_for_status() - - node = pq(release_res.text) - - tagline = node('p').text() - - tagline_res = re.match(r'^Released on (.+) from', tagline) - - assert tagline_res is not None - - released_time = tagline_res[1] - - files = [] - - for row in node('tr'): - td = pq(row)('td') - if len(td) != 3: - continue - file_name, file_size, file_hash = (pq(x).text() for x in td) - files.append((file_name, file_size, file_hash)) - - return { - 'released_time': released_time, - 'files': files - } +def get_url(name): + response = client.get_object('nix-channels', name) + return response.headers['x-amz-website-redirect-location'] def clone_images(): - for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'): - if not channel.startswith('nixos-') \ - or channel.endswith('-small') \ - or channel == 'nixos-unstable': + DOWNLOAD_MATCH = r'nixos-\d\d.\d\d/latest-nixos-\w+-\w+-linux.\w+(.sha256)?' + + object_names = [ + x.object_name + for x in client.list_objects_v2('nix-channels', recursive=True) + if re.fullmatch(DOWNLOAD_MATCH, x.object_name) + ] + + channels = defaultdict(lambda: []) + + for name in object_names: + chan, file = name.split('/', 1) + channels[chan].append(file) + + for channel, files in channels.items(): + chan_dir = working_dir / channel + git_rev = http_get(get_url(f'{channel}/git-revision')).text + git_rev_path = chan_dir / 'git-revision' + + if git_rev_path.exists() and git_rev == git_rev_path.read_text(): continue - if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE: - continue + logging.info(f'- {channel} -> {git_rev}') - chan_path = working_dir / channel - chan_path.mkdir(parents=True, exist_ok=True) + for file in files: + logging.info(f' - {file}') + url = get_url(f'{channel}/{file}') - res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False) - res.raise_for_status() + try: + download(url, chan_dir / file) + except requests.HTTPError as e: + if e.response.status_code == 404: + logging.info(f' - 404, skipped') + else: + raise - chan_location = res.headers['Location'] - chan_release_basename = chan_location.split('/')[-1] - - try: - last_url = (chan_path / '.last-url').read_text() - except (IOError, OSError): - last_url = 'not available' - - if chan_location == last_url: - continue - - logging.info(f'- {channel} -> {chan_release_basename}') - - # Matches nixos-19.03 -> nixos-19.03beta171840.23fd1394dc6 - # ^-------------^ - if chan_release_basename.startswith(channel + 'beta'): - logging.info(f' - Beta channel, not updating') - continue - - chan_info = get_channel(chan_location) - - atomic_write_file(chan_path / '.released-time', chan_info['released_time']) - - has_hash_fail = False - - keep_files = { '.last-url', '.released-time' } - rename_files = [] - - logging.info(f' - Downloading new files') - - chan_version = channel.split('-', 1)[1] - - chan_release_version = chan_release_basename.split('-', 1)[1] - - simplify_name = lambda fname: fname.replace(f'-{chan_release_version}-', f'-{chan_version}-') - - image_files = [ - (simplify_name(file_name), file_name, file_hash) - for file_name, _file_size, file_hash in chan_info['files'] - if file_name.endswith('.iso') or file_name.endswith('ova') - ] - - for mirror_file_name, upstream_file_name, file_hash in image_files: - keep_files.add(mirror_file_name) - logging.info(f' - {upstream_file_name} -> {mirror_file_name}') - tmp_dest = f'.update.{upstream_file_name}' - rename_files.append((tmp_dest, mirror_file_name)) - - download(f'{chan_location}/{upstream_file_name}', chan_path / tmp_dest) - actual_hash = file_sha256(chan_path / tmp_dest) - - if file_hash != actual_hash: - has_hash_fail = True - logging.error(f' - Incorrect hash') - logging.error(f' actual {actual_hash}') - logging.error(f' expected {file_hash}') - logging.info(f' - File saved as {tmp_dest}') - - if has_hash_fail: - logging.warn(f' - Found bad files. Will retry next time.') - else: - logging.info(f' - Renaming files') - - for tmp_dest, mirror_file_name in rename_files: - (chan_path / tmp_dest).rename(chan_path / mirror_file_name) - - logging.info(f' - Removing useless files') - - for file_path in chan_path.iterdir(): - file_name = file_path.name - - if file_name not in keep_files: - logging.info(f' - {file_name}') - file_path.unlink() - - logging.info(f' - Writing SHA256SUMS') - - with (chan_path / 'SHA256SUMS').open('w') as f: - for mirror_file_name, _upstream_file_name, file_hash in image_files: - f.write(f'{file_hash} *{mirror_file_name}\n') - - logging.info(f' - Update finished') - atomic_write_file(chan_path / '.last-url', chan_location) + atomic_write_file(git_rev_path, git_rev) if __name__ == "__main__": clone_images() diff --git a/pub.sh b/pub.sh index 4c69c5d..0acb908 100755 --- a/pub.sh +++ b/pub.sh @@ -4,4 +4,4 @@ set -e BASE_URL=${MIRROR_BASE_URL:-"https://mirrors.tuna.tsinghua.edu.cn/dart-pub"} UPSTREAM_URL=${TUNASYNC_UPSTREAM_URL:-"https://pub.dartlang.org/api"} echo "From $UPSTREAM_URL to $BASE_URL" -exec /pub-cache/bin/pub_mirror --upstream "$UPSTREAM_URL" --verbose --connections 10 --concurrency 10 "$TUNASYNC_WORKING_DIR" "$BASE_URL" +exec /pub-cache/bin/pub_mirror --upstream "$UPSTREAM_URL" --verbose --delete --connections 10 --concurrency 10 "$TUNASYNC_WORKING_DIR" "$BASE_URL" diff --git a/ros2.sh b/ros2.sh new file mode 100755 index 0000000..ee40e43 --- /dev/null +++ b/ros2.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# requires: wget curl +set -e +set -o pipefail + +_here=`dirname $(realpath $0)` +. ${_here}/helpers/apt-download + +[ -z "${LOADED_APT_DOWNLOAD}" ] && (echo "failed to load apt-download"; exit 1) + +BASE_PATH="${TUNASYNC_WORKING_DIR}" +BASE_URL=${TUNASYNC_UPSTREAM_URL:-"http://packages.ros.org/ros2"} + +APT_PATH="${BASE_PATH}/ubuntu" + +APT_VERSIONS=(bionic buster cosmic disco eoan focal stretch xenial) + +# =================== APT repos =============================== +if [[ ! -z ${DRY_RUN:-} ]]; then + export APT_DRY_RUN=1 +fi +mkdir -p ${APT_PATH} +base_url="${BASE_URL}/ubuntu" +for version in ${APT_VERSIONS[@]}; do + for arch in "amd64" "arm64" "armhf"; do + echo "=== Syncing $version $arch" + apt-download-binary "${base_url}" "$version" "main" "$arch" "${APT_PATH}" || true + done +done +echo "APT finished"