Merge branch 'master' of https://github.com/tuna/tunasync-scripts

2025-07-05 18:22:45 +00:00 · 2020-03-25 15:00:09 +08:00 · 2020-03-25 15:00:09 +08:00 · 00bc5bd38b
commit 00bc5bd38b
parent 7616d97e4c 3ef9943449
11 changed files with 137 additions and 220 deletions
--- a/4
+++ b/4
@ -1,8 +1,8 @@
-FROM python:3.7-buster
+FROM debian:buster
 MAINTAINER Justin Wong <yuzhi.wang@tuna.tsinghua.edu.cn>

 RUN apt-get update && \
-        apt-get install -y wget curl rsync lftp git jq python-dev python-pip yum-utils createrepo aria2 awscli ack composer php-curl php-zip
+        apt-get install -y wget curl rsync lftp git jq python3-dev python3-pip yum-utils createrepo aria2 awscli ack composer php-curl php-zip
        
 RUN STATIC_DEPS=true pip3 install pyquery
 RUN pip3 install requests[socks] pyyaml gsutil bandersnatch==3.6.0
--- a/adoptopenjdk.sh
+++ b/adoptopenjdk.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-# requires: curl, sha256sum, awk
+# requires: curl, sha256sum, awk, jq
 set -e

 BASE_PATH="${TUNASYNC_WORKING_DIR}"
@ -7,6 +7,7 @@ BASE_PATH="${TUNASYNC_WORKING_DIR}"
 # 参数为版本，比如8,11等
 function downloadRelease() {
  remote_filelist="$BASE_PATH/$1/filelist"
+  mkdir -p "$BASE_PATH/$1"
  echo -n "" >$remote_filelist
  curl -s "https://api.adoptopenjdk.net/v2/latestAssets/releases/openjdk$1" | \
    jq -r '.[]| [.version,.binary_type,.architecture,.os,.binary_name,.binary_link,.checksum_link,.installer_name,.installer_link,.installer_checksum_link]| @tsv' | \
@ -20,12 +21,14 @@ function downloadRelease() {
        echo "Skiping $binary_name"
        downloaded=true
      fi
-      while [[ $downloaded != true ]]; do
+      local retry=0
+      while [[ $retry -lt 3 && $downloaded != true ]]; do
        echo "Downloading ${dest_filename}"
        link="$binary_link"
        download_and_check && {
            downloaded=true
        }
+        ((retry+=1))
      done
      if [[ ! -z "$installer_name" ]]; then
        dest_filename="$BASE_PATH/$version/$binary_type/$architecture/$os/$installer_name"
@ -36,13 +39,15 @@ function downloadRelease() {
          echo "Skiping $installer_name"
          downloaded=true
        fi
-        while [[ $downloaded != true ]]; do
+        retry=0
+        while [[ $retry -lt 3 && $downloaded != true ]]; do
          echo "Downloading ${dest_filename}"
          link="$installer_link"
          checksum_link="$installer_checksum_link"
          download_and_check && {
            downloaded=true
          }
+          ((retry+=1))
        done
      fi
    done
@ -52,10 +57,11 @@ function clean_old_releases() {
  declare version=$1
  declare remote_filelist="$BASE_PATH/$version/filelist"
  declare local_filelist="/tmp/filelist.local"
+  [[ ! -f "$remote_filelist" ]] && return 0
  find "$BASE_PATH/$version" -type f > ${local_filelist}
  comm <(sort $remote_filelist) <(sort $local_filelist) -13 | while read file; do
      echo "deleting ${file}"
-		rm "${file}"
+      # rm "${file}"
  done
 }

@ -67,7 +73,11 @@ function download_and_check() {
    "$link"
  curl -s -S --fail -L ${CURL_OPTIONS:-}  \
    -o "${dest_filename}.sha256.txt.tmp" \
-    "$checksum_link"
+    "$checksum_link" || {
+    echo "Warning: ${dest_filename}.sha256.txt not exist, skipping SHA256 check"
+    mv "${dest_filename}.tmp" "${dest_filename}"
+    return 0
+  }
  sha256sum_check && {
    mv "${dest_filename}.sha256.txt.tmp" "${dest_filename}.sha256.txt"
    mv "${dest_filename}.tmp" "${dest_filename}"
@ -78,14 +88,14 @@ function download_and_check() {
 function sha256sum_check() {
  expected=$(cat "${dest_filename}.sha256.txt.tmp" | awk '{print $1}')
  actual=$(sha256sum "${dest_filename}.tmp" | awk '{print $1}')
-  if [ "$expected" = "$actual" ]; then
+  if [[ "$expected" = "$actual" ]]; then
    return 0
  else
    return 1
  fi
 }

-for i in 8 11 13;
+for i in 8 9 10 11 12 13 14;
 do
  downloadRelease $i && clean_old_releases $i
 done
--- a/dockerfiles/homebrew-mirror/Dockerfile
+++ b/dockerfiles/homebrew-mirror/Dockerfile
@ -4,6 +4,6 @@ RUN apk add --no-cache bash ruby ruby-bigdecimal git curl ncurses ruby-json ruby

 RUN git clone --depth 1 https://github.com/Homebrew/brew.git /home/homebrew/.linuxbrew/homebrew && cd /home/homebrew/.linuxbrew && mkdir bin && ln -s /home/homebrew/.linuxbrew/homebrew/bin/brew /home/homebrew/.linuxbrew/bin

-RUN git clone --depth 1 https://github.com/gaoyifan/homebrew-bottle-mirror.git /home/homebrew/.linuxbrew/homebrew/Library/Taps/gaoyifan/homebrew-bottle-mirror
+RUN git clone --depth 1 https://github.com/z4yx/homebrew-bottle-mirror.git /home/homebrew/.linuxbrew/homebrew/Library/Taps/gaoyifan/homebrew-bottle-mirror

 RUN chown -R 2000 /home/homebrew
--- a/dockerfiles/nix-channels/Dockerfile
+++ b/dockerfiles/nix-channels/Dockerfile
@ -1,7 +1,9 @@
-FROM python:3-buster
+FROM debian:buster
 MAINTAINER Wang Ruikang <dramforever@live.com>

-RUN pip3 install pyquery requests && \
+RUN apt-get update && \
+    apt-get install -y python3-dev python3-pip curl && \
+    pip3 install pyquery requests minio && \
    # Install Nix. To simplify management we only copy binaries and create
    # symlinks, and do no further configuration
    curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \
--- a/dockerfiles/stackage/Dockerfile
+++ b/dockerfiles/stackage/Dockerfile
@ -1,20 +0,0 @@
-FROM python:3.6
-MAINTAINER Justin Wong <yuzhi.wang@tuna.tsinghua.edu.cn>
-
-RUN echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie main contrib non-free" > /etc/apt/sources.list && \
-        echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie-backports main contrib non-free" >> /etc/apt/sources.list && \
-        echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian/ jessie-updates main contrib non-free" >> /etc/apt/sources.list && \
-        echo "deb http://mirrors.tuna.tsinghua.edu.cn/debian-security/ jessie/updates main contrib non-free" >> /etc/apt/sources.list
-
-RUN apt-get update && \
-        apt-get install -y git aria2
-
-RUN pip3 install requests pyyaml
-
-RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && apt-get install -y locales -qq && locale-gen
-ENV LANG=en_US.UTF-8
-ENV LANGUAGE=en_US.UTF-8
-ENV LC_ALL=en_US.UTF-8
-
-ENV HOME=/tmp
-CMD /bin/bash
--- a/github-release.py
+++ b/github-release.py
@ -30,6 +30,7 @@ REPOS = [
    "rust-analyzer/rust-analyzer",
    "kubernetes/minikube",
    {"repo": "iina/iina", "versions": -1, "pre_release": True, "flat": True},
+    "FreeCAD/FreeCAD",
 ]

 # connect and read timeout value
--- a/linuxbrew-bottles.sh
+++ b/linuxbrew-bottles.sh
@ -24,6 +24,7 @@ do
        repo_name="homebrew-${tap}"
        args="mac"
    fi
+    remote_filelist="$HOMEBREW_CACHE/filelist.txt"

    echo "===== SYNC STARTED AT $(date -R) ====="
    dir_core=/home/homebrew/.linuxbrew/homebrew/Library/Taps/homebrew/homebrew-core
@ -33,4 +34,13 @@ do
    echo ""
    echo "> RUN brew bottle-mirror $args..."
    /home/homebrew/.linuxbrew/bin/brew bottle-mirror "$args"
+    if [[ -f "$remote_filelist" ]];then # clean outdated files
+        local_filelist=/tmp/filelist.local
+        (cd ${HOMEBREW_CACHE}; find . -type f -iname "*.tmp" -delete)
+        (cd ${HOMEBREW_CACHE}; find . -type f -mtime 30 -iname "*.tar.gz") | sed 's+^\./++' > $local_filelist
+        comm <(sort $remote_filelist) <(sort $local_filelist) -13 | while read file; do
+            echo "deleting ${HOMEBREW_CACHE}/${file}"
+            rm "${HOMEBREW_CACHE}/${file}"
+        done
+    fi
 done
--- a/nix-channels.py
+++ b/nix-channels.py
@ -3,17 +3,21 @@ import hashlib
 import json
 import logging
 import lzma
+import minio
 import os
+import pytz
 import re
-import sys
 import requests
 import subprocess
+import sys

 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
 from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor

+from minio.credentials import Credentials, Static
+
 from urllib3.util.retry import Retry

 ### Config
@ -35,7 +39,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30))

 STORE_DIR = 'store'
 RELEASES_DIR = 'releases'
-CLONE_SINCE = datetime(2018, 12, 1)
+
+# Channels that have not updated since migration to Netlify [1] are assumed to
+# be too old and defunct.
+#
+# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
+CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc)
 TIMEOUT = 60

 working_dir = Path(WORKING_DIR)
@ -63,9 +72,6 @@ logging.basicConfig(
 # Don't forget 'global failure'
 failure = False

-def http_head(*args, **kwargs):
-    return session.head(*args, timeout=TIMEOUT, **kwargs)
-
 def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)

@ -131,28 +137,15 @@ def download(url, dest):

    download_dest.rename(dest)

-def get_links(url):
-    r = http_get(url)
-    r.raise_for_status()
+credentials = Credentials(provider=Static())
+client = minio.Minio('s3.amazonaws.com', credentials=credentials)

-    node = pq(r.content)
-
-    links = []
-    for row in node('tr'):
-        td = pq(row)('td')
-        if len(td) != 5:
-            continue
-
-        link_target = td[1].find('a').get('href')
-        if link_target.startswith('/'):
-            # Link to parent directory
-            continue
-
-        last_updated = td[2].text.strip()
-
-        links.append((link_target, last_updated))
-
-    return links
+def get_channels():
+    return [
+        (x.object_name, x.last_modified)
+        for x in client.list_objects_v2('nix-channels')
+        if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name)
+    ]

 def clone_channels():
    logging.info(f'- Fetching channels')
@ -161,17 +154,15 @@ def clone_channels():

    working_dir.mkdir(parents=True, exist_ok=True)

-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
+    for channel, chan_updated in get_channels():
        chan_path = working_dir / channel

        # Old channels, little value in cloning and format changes
-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
+        if chan_updated < CLONE_SINCE:
            continue

-        chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
-        chan_redirect_res.raise_for_status()
-
-        chan_location = chan_redirect_res.headers['Location']
+        chan_obj = client.get_object('nix-channels', channel)
+        chan_location = chan_obj.headers['x-amz-website-redirect-location']

        chan_release = chan_location.split('/')[-1]

--- a/nixos-images.py
+++ b/nixos-images.py
@ -2,6 +2,7 @@
 import hashlib
 import logging
 import lzma
+import minio
 import os
 import re
 import sys
@ -11,6 +12,9 @@ import subprocess
 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
 from pathlib import Path
+from collections import defaultdict
+
+from minio.credentials import Credentials, Static

 from urllib3.util.retry import Retry

@ -38,14 +42,8 @@ def http_head(*args, **kwargs):
 def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)

-def file_sha256(dest):
-    sha = subprocess.check_output(
-        [ 'sha256sum', str(dest) ],
-        universal_newlines=True
-    )
-    return sha.split(' ')[0]
-
 def atomic_write_file(dest, contents):
+    dest.parent.mkdir(parents=True, exist_ok=True)
    tmp_dest = dest.parent / f'.{dest.name}.tmp'
    with tmp_dest.open('w') as f:
        f.write(contents)
@ -93,156 +91,51 @@ def download(url, dest):

    download_dest.rename(dest)

-def get_links(url):
-    r = http_get(url)
-    r.raise_for_status()
+credentials = Credentials(provider=Static())
+client = minio.Minio('s3.amazonaws.com', credentials=credentials)

-    node = pq(r.content)
-
-    links = []
-    for row in node('tr'):
-        td = pq(row)('td')
-        if len(td) != 5:
-            continue
-
-        link_target = td[1].find('a').get('href')
-        if link_target.startswith('/'):
-            # Link to parent directory
-            continue
-
-        last_updated = td[2].text.strip()
-
-        links.append((link_target, last_updated))
-
-    return links
-
-def get_channel(chan_location):
-    release_res = http_get(chan_location)
-    release_res.raise_for_status()
-
-    node = pq(release_res.text)
-
-    tagline = node('p').text()
-
-    tagline_res = re.match(r'^Released on (.+) from', tagline)
-
-    assert tagline_res is not None
-
-    released_time = tagline_res[1]
-
-    files = []
-
-    for row in node('tr'):
-        td = pq(row)('td')
-        if len(td) != 3:
-            continue
-        file_name, file_size, file_hash = (pq(x).text() for x in td)
-        files.append((file_name, file_size, file_hash))
-
-    return {
-        'released_time': released_time,
-        'files': files
-    }
+def get_url(name):
+    response = client.get_object('nix-channels', name)
+    return response.headers['x-amz-website-redirect-location']

 def clone_images():
-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
-        if not channel.startswith('nixos-') \
-            or channel.endswith('-small') \
-            or channel == 'nixos-unstable':
-            continue
+    DOWNLOAD_MATCH = r'nixos-\d\d.\d\d/latest-nixos-\w+-\w+-linux.\w+(.sha256)?'

-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
-            continue
-
-        chan_path = working_dir / channel
-        chan_path.mkdir(parents=True, exist_ok=True)
-
-        res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
-        res.raise_for_status()
-
-        chan_location = res.headers['Location']
-        chan_release_basename = chan_location.split('/')[-1]
-
-        try:
-            last_url = (chan_path / '.last-url').read_text()
-        except (IOError, OSError):
-            last_url = 'not available'
-
-        if chan_location == last_url:
-            continue
-
-        logging.info(f'- {channel} -> {chan_release_basename}')
-
-        # Matches nixos-19.03 -> nixos-19.03beta171840.23fd1394dc6
-        #                        ^-------------^
-        if chan_release_basename.startswith(channel + 'beta'):
-            logging.info(f'  - Beta channel, not updating')
-            continue
-
-        chan_info = get_channel(chan_location)
-
-        atomic_write_file(chan_path / '.released-time', chan_info['released_time'])
-
-        has_hash_fail = False
-
-        keep_files = { '.last-url', '.released-time' }
-        rename_files = []
-
-        logging.info(f'  - Downloading new files')
-
-        chan_version = channel.split('-', 1)[1]
-
-        chan_release_version = chan_release_basename.split('-', 1)[1]
-
-        simplify_name = lambda fname: fname.replace(f'-{chan_release_version}-', f'-{chan_version}-')
-
-        image_files = [
-            (simplify_name(file_name), file_name, file_hash)
-            for file_name, _file_size, file_hash in chan_info['files']
-            if file_name.endswith('.iso') or file_name.endswith('ova')
+    object_names = [
+        x.object_name
+        for x in client.list_objects_v2('nix-channels', recursive=True)
+        if re.fullmatch(DOWNLOAD_MATCH, x.object_name)
    ]

-        for mirror_file_name, upstream_file_name, file_hash in image_files: 
-            keep_files.add(mirror_file_name)
-            logging.info(f'    - {upstream_file_name} -> {mirror_file_name}')
-            tmp_dest = f'.update.{upstream_file_name}'
-            rename_files.append((tmp_dest, mirror_file_name))
+    channels = defaultdict(lambda: [])

-            download(f'{chan_location}/{upstream_file_name}', chan_path / tmp_dest)
-            actual_hash = file_sha256(chan_path / tmp_dest)
+    for name in object_names:
+        chan, file = name.split('/', 1)
+        channels[chan].append(file)

-            if file_hash != actual_hash:
-                has_hash_fail = True
-                logging.error(f'      - Incorrect hash')
-                logging.error(f'        actual   {actual_hash}')
-                logging.error(f'        expected {file_hash}')
-                logging.info(f'      - File saved as {tmp_dest}')
+    for channel, files in channels.items():
+        chan_dir = working_dir / channel
+        git_rev = http_get(get_url(f'{channel}/git-revision')).text
+        git_rev_path = chan_dir / 'git-revision'

-        if has_hash_fail:
-            logging.warn(f'  - Found bad files. Will retry next time.')
+        if git_rev_path.exists() and git_rev == git_rev_path.read_text():
+            continue
+
+        logging.info(f'- {channel} -> {git_rev}')
+
+        for file in files:
+            logging.info(f'  - {file}')
+            url = get_url(f'{channel}/{file}')
+
+            try:
+                download(url, chan_dir / file)
+            except requests.HTTPError as e:
+                if e.response.status_code == 404:
+                    logging.info(f'    - 404, skipped')
                else:
-            logging.info(f'  - Renaming files')
+                    raise

-            for tmp_dest, mirror_file_name in rename_files:
-                (chan_path / tmp_dest).rename(chan_path / mirror_file_name)
-
-            logging.info(f'  - Removing useless files')
-
-            for file_path in chan_path.iterdir():
-                file_name = file_path.name
-
-                if file_name not in keep_files:
-                    logging.info(f'    - {file_name}')
-                    file_path.unlink()
-
-            logging.info(f'  - Writing SHA256SUMS')
-
-            with (chan_path / 'SHA256SUMS').open('w') as f:
-                for mirror_file_name, _upstream_file_name, file_hash in image_files:
-                    f.write(f'{file_hash} *{mirror_file_name}\n')
-
-            logging.info(f'  - Update finished')
-            atomic_write_file(chan_path / '.last-url', chan_location)
+        atomic_write_file(git_rev_path, git_rev)

 if __name__ == "__main__":
    clone_images()
--- a/pub.sh
+++ b/pub.sh
@ -4,4 +4,4 @@ set -e
 BASE_URL=${MIRROR_BASE_URL:-"https://mirrors.tuna.tsinghua.edu.cn/dart-pub"}
 UPSTREAM_URL=${TUNASYNC_UPSTREAM_URL:-"https://pub.dartlang.org/api"}
 echo "From $UPSTREAM_URL to $BASE_URL"
-exec /pub-cache/bin/pub_mirror --upstream "$UPSTREAM_URL" --verbose --connections 10 --concurrency 10 "$TUNASYNC_WORKING_DIR" "$BASE_URL"
+exec /pub-cache/bin/pub_mirror --upstream "$UPSTREAM_URL" --verbose --delete --connections 10 --concurrency 10 "$TUNASYNC_WORKING_DIR" "$BASE_URL"
--- a/ros2.sh
+++ b/ros2.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+# requires: wget curl
+set -e
+set -o pipefail
+
+_here=`dirname $(realpath $0)`
+. ${_here}/helpers/apt-download
+
+[ -z "${LOADED_APT_DOWNLOAD}" ] && (echo "failed to load apt-download"; exit 1)
+
+BASE_PATH="${TUNASYNC_WORKING_DIR}"
+BASE_URL=${TUNASYNC_UPSTREAM_URL:-"http://packages.ros.org/ros2"}
+
+APT_PATH="${BASE_PATH}/ubuntu"
+
+APT_VERSIONS=(bionic buster cosmic disco eoan focal stretch xenial)
+
+# =================== APT repos ===============================
+if [[ ! -z ${DRY_RUN:-} ]]; then
+	export APT_DRY_RUN=1
+fi
+mkdir -p ${APT_PATH}
+base_url="${BASE_URL}/ubuntu"
+for version in ${APT_VERSIONS[@]}; do
+	for arch in "amd64" "arm64" "armhf"; do
+		echo "=== Syncing $version $arch"
+		apt-download-binary "${base_url}" "$version" "main" "$arch" "${APT_PATH}" || true
+	done
+done
+echo "APT finished"