From a14304c1b243fbaae45f364fa3d277d3aab87482 Mon Sep 17 00:00:00 2001
From: dramforever <dramforever@live.com>
Date: Mon, 23 Mar 2020 20:33:02 +0800
Subject: [PATCH] nix-channels: update

https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
---
 dockerfiles/nix-channels/Dockerfile |  2 +-
 nix-channels.py                     | 52 +++++++++++------------------
 2 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/dockerfiles/nix-channels/Dockerfile b/dockerfiles/nix-channels/Dockerfile
index 7171d6d..fce8349 100644
--- a/dockerfiles/nix-channels/Dockerfile
+++ b/dockerfiles/nix-channels/Dockerfile
@@ -1,7 +1,7 @@
 FROM python:3-buster
 MAINTAINER Wang Ruikang <dramforever@live.com>
 
-RUN pip3 install pyquery requests && \
+RUN pip3 install pyquery requests minio && \
     # Install Nix. To simplify management we only copy binaries and create
     # symlinks, and do no further configuration
     curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \
diff --git a/nix-channels.py b/nix-channels.py
index 7253d1a..176e4d9 100644
--- a/nix-channels.py
+++ b/nix-channels.py
@@ -3,11 +3,13 @@ import hashlib
 import json
 import logging
 import lzma
+import minio
 import os
+import pytz
 import re
-import sys
 import requests
 import subprocess
+import sys
 
 from pyquery import PyQuery as pq
 from datetime import datetime, timedelta
@@ -35,7 +37,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30))
 
 STORE_DIR = 'store'
 RELEASES_DIR = 'releases'
-CLONE_SINCE = datetime(2018, 12, 1)
+
+# Channels that have not updated since migration to Netlify [1] are assumed to
+# be too old and defunct.
+#
+# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
+CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc)
 TIMEOUT = 60
 
 working_dir = Path(WORKING_DIR)
@@ -63,9 +70,6 @@ logging.basicConfig(
 # Don't forget 'global failure'
 failure = False
 
-def http_head(*args, **kwargs):
-    return session.head(*args, timeout=TIMEOUT, **kwargs)
-
 def http_get(*args, **kwargs):
     return session.get(*args, timeout=TIMEOUT, **kwargs)
 
@@ -131,28 +135,14 @@ def download(url, dest):
 
     download_dest.rename(dest)
 
-def get_links(url):
-    r = http_get(url)
-    r.raise_for_status()
+client = minio.Minio('s3.amazonaws.com')
 
-    node = pq(r.content)
-
-    links = []
-    for row in node('tr'):
-        td = pq(row)('td')
-        if len(td) != 5:
-            continue
-
-        link_target = td[1].find('a').get('href')
-        if link_target.startswith('/'):
-            # Link to parent directory
-            continue
-
-        last_updated = td[2].text.strip()
-
-        links.append((link_target, last_updated))
-
-    return links
+def get_channels():
+    return [
+        (x.object_name, x.last_modified)
+        for x in client.list_objects_v2('nix-channels')
+        if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name)
+    ]
 
 def clone_channels():
     logging.info(f'- Fetching channels')
@@ -161,17 +151,15 @@ def clone_channels():
 
     working_dir.mkdir(parents=True, exist_ok=True)
 
-    for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
+    for channel, chan_updated in get_channels():
         chan_path = working_dir / channel
 
         # Old channels, little value in cloning and format changes
-        if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
+        if chan_updated < CLONE_SINCE:
             continue
 
-        chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
-        chan_redirect_res.raise_for_status()
-
-        chan_location = chan_redirect_res.headers['Location']
+        chan_obj = client.get_object('nix-channels', channel)
+        chan_location = chan_obj.headers['x-amz-website-redirect-location']
 
         chan_release = chan_location.split('/')[-1]