Merge pull request #62 from dramforever/nix-new

nix-channels and nixos-images update due to upstream change
2025-07-08 04:22:46 +00:00 · 2020-03-23 22:15:04 +08:00 · 2020-03-23 22:15:04 +08:00 · fc9929258c
commit fc9929258c
parent 5c2db12bba 4afe8ac16d
3 changed files with 60 additions and 182 deletions
--- a/dockerfiles/nix-channels/Dockerfile
+++ b/dockerfiles/nix-channels/Dockerfile
@ -1,7 +1,7 @@
 FROM python:3-buster
 MAINTAINER Wang Ruikang <dramforever@live.com>
-RUN pip3 install pyquery requests && \
+RUN pip3 install pyquery requests minio && \
    # Install Nix. To simplify management we only copy binaries and create
    # symlinks, and do no further configuration
    curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \
--- a/nix-channels.py
+++ b/nix-channels.py
@ -3,11 +3,13 @@ import hashlib
 import json
 import logging
 import lzma
 import minio
 import os
 import pytz
 import re
 import sys
 import requests
 import subprocess
 import sys
 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
@ -35,7 +37,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30))
 STORE_DIR = 'store'
 RELEASES_DIR = 'releases'
-CLONE_SINCE = datetime(2018, 12, 1)
+
 # Channels that have not updated since migration to Netlify [1] are assumed to
 # be too old and defunct.
 #
 # [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
 CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc)
 TIMEOUT = 60
 working_dir = Path(WORKING_DIR)
@ -63,9 +70,6 @@ logging.basicConfig(
 # Don't forget 'global failure'
 failure = False
 def http_head(*args, **kwargs):
    return session.head(*args, timeout=TIMEOUT, **kwargs)
 def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)
@ -131,28 +135,14 @@ def download(url, dest):
    download_dest.rename(dest)
-def get_links(url):
+client = minio.Minio('s3.amazonaws.com')
    r = http_get(url)
    r.raise_for_status()
-    node = pq(r.content)
+def get_channels():
-
+    return [
-    links = []
+        (x.object_name, x.last_modified)
-    for row in node('tr'):
+        for x in client.list_objects_v2('nix-channels')
-        td = pq(row)('td')
+        if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name)
-        if len(td) != 5:
+    ]
            continue
        link_target = td[1].find('a').get('href')
        if link_target.startswith('/'):
            # Link to parent directory
            continue
        last_updated = td[2].text.strip()
        links.append((link_target, last_updated))
    return links
 def clone_channels():
    logging.info(f'- Fetching channels')
@ -161,17 +151,15 @@ def clone_channels():
    working_dir.mkdir(parents=True, exist_ok=True)
-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
+    for channel, chan_updated in get_channels():
        chan_path = working_dir / channel
        # Old channels, little value in cloning and format changes
-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
+        if chan_updated < CLONE_SINCE:
            continue
-        chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
+        chan_obj = client.get_object('nix-channels', channel)
-        chan_redirect_res.raise_for_status()
+        chan_location = chan_obj.headers['x-amz-website-redirect-location']
        chan_location = chan_redirect_res.headers['Location']
        chan_release = chan_location.split('/')[-1]
--- a/nixos-images.py
+++ b/nixos-images.py
@ -2,6 +2,7 @@
 import hashlib
 import logging
 import lzma
 import minio
 import os
 import re
 import sys
@ -11,6 +12,7 @@ import subprocess
 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
 from pathlib import Path
 from collections import defaultdict
 from urllib3.util.retry import Retry
@ -38,14 +40,8 @@ def http_head(*args, **kwargs):
 def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)
 def file_sha256(dest):
    sha = subprocess.check_output(
        [ 'sha256sum', str(dest) ],
        universal_newlines=True
    )
    return sha.split(' ')[0]
 def atomic_write_file(dest, contents):
    dest.parent.mkdir(parents=True, exist_ok=True)
    tmp_dest = dest.parent / f'.{dest.name}.tmp'
    with tmp_dest.open('w') as f:
        f.write(contents)
@ -93,156 +89,50 @@ def download(url, dest):
    download_dest.rename(dest)
-def get_links(url):
+client = minio.Minio('s3.amazonaws.com')
    r = http_get(url)
    r.raise_for_status()
-    node = pq(r.content)
+def get_url(name):
-
+    response = client.get_object('nix-channels', name)
-    links = []
+    return response.headers['x-amz-website-redirect-location']
    for row in node('tr'):
        td = pq(row)('td')
        if len(td) != 5:
            continue
        link_target = td[1].find('a').get('href')
        if link_target.startswith('/'):
            # Link to parent directory
            continue
        last_updated = td[2].text.strip()
        links.append((link_target, last_updated))
    return links
 def get_channel(chan_location):
    release_res = http_get(chan_location)
    release_res.raise_for_status()
    node = pq(release_res.text)
    tagline = node('p').text()
    tagline_res = re.match(r'^Released on (.+) from', tagline)
    assert tagline_res is not None
    released_time = tagline_res[1]
    files = []
    for row in node('tr'):
        td = pq(row)('td')
        if len(td) != 3:
            continue
        file_name, file_size, file_hash = (pq(x).text() for x in td)
        files.append((file_name, file_size, file_hash))
    return {
        'released_time': released_time,
        'files': files
    }
 def clone_images():
-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
+    DOWNLOAD_MATCH = r'nixos-\d\d.\d\d/latest-nixos-\w+-\w+-linux.\w+(.sha256)?'
        if not channel.startswith('nixos-') \
            or channel.endswith('-small') \
            or channel == 'nixos-unstable':
            continue
-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
+    object_names = [
-            continue
+        x.object_name
-
+        for x in client.list_objects_v2('nix-channels', recursive=True)
-        chan_path = working_dir / channel
+        if re.fullmatch(DOWNLOAD_MATCH, x.object_name)
        chan_path.mkdir(parents=True, exist_ok=True)
        res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
        res.raise_for_status()
        chan_location = res.headers['Location']
        chan_release_basename = chan_location.split('/')[-1]
        try:
            last_url = (chan_path / '.last-url').read_text()
        except (IOError, OSError):
            last_url = 'not available'
        if chan_location == last_url:
            continue
        logging.info(f'- {channel} -> {chan_release_basename}')
        # Matches nixos-19.03 -> nixos-19.03beta171840.23fd1394dc6
        #                        ^-------------^
        if chan_release_basename.startswith(channel + 'beta'):
            logging.info(f'  - Beta channel, not updating')
            continue
        chan_info = get_channel(chan_location)
        atomic_write_file(chan_path / '.released-time', chan_info['released_time'])
        has_hash_fail = False
        keep_files = { '.last-url', '.released-time' }
        rename_files = []
        logging.info(f'  - Downloading new files')
        chan_version = channel.split('-', 1)[1]
        chan_release_version = chan_release_basename.split('-', 1)[1]
        simplify_name = lambda fname: fname.replace(f'-{chan_release_version}-', f'-{chan_version}-')
        image_files = [
            (simplify_name(file_name), file_name, file_hash)
            for file_name, _file_size, file_hash in chan_info['files']
            if file_name.endswith('.iso') or file_name.endswith('ova')
    ]
-        for mirror_file_name, upstream_file_name, file_hash in image_files: 
+    channels = defaultdict(lambda: [])
            keep_files.add(mirror_file_name)
            logging.info(f'    - {upstream_file_name} -> {mirror_file_name}')
            tmp_dest = f'.update.{upstream_file_name}'
            rename_files.append((tmp_dest, mirror_file_name))
-            download(f'{chan_location}/{upstream_file_name}', chan_path / tmp_dest)
+    for name in object_names:
-            actual_hash = file_sha256(chan_path / tmp_dest)
+        chan, file = name.split('/', 1)
        channels[chan].append(file)
-            if file_hash != actual_hash:
+    for channel, files in channels.items():
-                has_hash_fail = True
+        chan_dir = working_dir / channel
-                logging.error(f'      - Incorrect hash')
+        git_rev = http_get(get_url(f'{channel}/git-revision')).text
-                logging.error(f'        actual   {actual_hash}')
+        git_rev_path = chan_dir / 'git-revision'
                logging.error(f'        expected {file_hash}')
                logging.info(f'      - File saved as {tmp_dest}')
-        if has_hash_fail:
+        if git_rev_path.exists() and git_rev == git_rev_path.read_text():
-            logging.warn(f'  - Found bad files. Will retry next time.')
+            continue
        logging.info(f'- {channel} -> {git_rev}')
        for file in files:
            logging.info(f'  - {file}')
            url = get_url(f'{channel}/{file}')
            try:
                download(url, chan_dir / file)
            except requests.HTTPError as e:
                if e.response.status_code == 404:
                    logging.info(f'    - 404, skipped')
                else:
-            logging.info(f'  - Renaming files')
+                    raise
-            for tmp_dest, mirror_file_name in rename_files:
+        atomic_write_file(git_rev_path, git_rev)
                (chan_path / tmp_dest).rename(chan_path / mirror_file_name)
            logging.info(f'  - Removing useless files')
            for file_path in chan_path.iterdir():
                file_name = file_path.name
                if file_name not in keep_files:
                    logging.info(f'    - {file_name}')
                    file_path.unlink()
            logging.info(f'  - Writing SHA256SUMS')
            with (chan_path / 'SHA256SUMS').open('w') as f:
                for mirror_file_name, _upstream_file_name, file_hash in image_files:
                    f.write(f'{file_hash} *{mirror_file_name}\n')
            logging.info(f'  - Update finished')
            atomic_write_file(chan_path / '.last-url', chan_location)
 if __name__ == "__main__":
    clone_images()