nix-channels: update

https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
2025-07-11 17:08:57 +00:00 · 2020-03-23 20:33:02 +08:00
parent 0babe7557c
commit a14304c1b2
2 changed files with 21 additions and 33 deletions
--- a/dockerfiles/nix-channels/Dockerfile
+++ b/dockerfiles/nix-channels/Dockerfile
@ -1,7 +1,7 @@
 FROM python:3-buster
 MAINTAINER Wang Ruikang <dramforever@live.com>

-RUN pip3 install pyquery requests && \
+RUN pip3 install pyquery requests minio && \
    # Install Nix. To simplify management we only copy binaries and create
    # symlinks, and do no further configuration
    curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \
--- a/nix-channels.py
+++ b/nix-channels.py
@ -3,11 +3,13 @@ import hashlib
 import json
 import logging
 import lzma
+import minio
 import os
+import pytz
 import re
-import sys
 import requests
 import subprocess
+import sys

 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
@ -35,7 +37,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30))

 STORE_DIR = 'store'
 RELEASES_DIR = 'releases'
-CLONE_SINCE = datetime(2018, 12, 1)
+
+# Channels that have not updated since migration to Netlify [1] are assumed to
+# be too old and defunct.
+#
+# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
+CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc)
 TIMEOUT = 60

 working_dir = Path(WORKING_DIR)
@ -63,9 +70,6 @@ logging.basicConfig(
 # Don't forget 'global failure'
 failure = False

-def http_head(*args, **kwargs):
-    return session.head(*args, timeout=TIMEOUT, **kwargs)
-
 def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)

@ -131,28 +135,14 @@ def download(url, dest):

    download_dest.rename(dest)

-def get_links(url):
-    r = http_get(url)
-    r.raise_for_status()
+client = minio.Minio('s3.amazonaws.com')

-    node = pq(r.content)
-
-    links = []
-    for row in node('tr'):
-        td = pq(row)('td')
-        if len(td) != 5:
-            continue
-
-        link_target = td[1].find('a').get('href')
-        if link_target.startswith('/'):
-            # Link to parent directory
-            continue
-
-        last_updated = td[2].text.strip()
-
-        links.append((link_target, last_updated))
-
-    return links
+def get_channels():
+    return [
+        (x.object_name, x.last_modified)
+        for x in client.list_objects_v2('nix-channels')
+        if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name)
+    ]

 def clone_channels():
    logging.info(f'- Fetching channels')
@ -161,17 +151,15 @@ def clone_channels():

    working_dir.mkdir(parents=True, exist_ok=True)

-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
+    for channel, chan_updated in get_channels():
        chan_path = working_dir / channel

        # Old channels, little value in cloning and format changes
-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
+        if chan_updated < CLONE_SINCE:
            continue

-        chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
-        chan_redirect_res.raise_for_status()
-
-        chan_location = chan_redirect_res.headers['Location']
+        chan_obj = client.get_object('nix-channels', channel)
+        chan_location = chan_obj.headers['x-amz-website-redirect-location']

        chan_release = chan_location.split('/')[-1]