From a14304c1b243fbaae45f364fa3d277d3aab87482 Mon Sep 17 00:00:00 2001 From: dramforever Date: Mon, 23 Mar 2020 20:33:02 +0800 Subject: [PATCH] nix-channels: update https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212 --- dockerfiles/nix-channels/Dockerfile | 2 +- nix-channels.py | 52 +++++++++++------------------ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/dockerfiles/nix-channels/Dockerfile b/dockerfiles/nix-channels/Dockerfile index 7171d6d..fce8349 100644 --- a/dockerfiles/nix-channels/Dockerfile +++ b/dockerfiles/nix-channels/Dockerfile @@ -1,7 +1,7 @@ FROM python:3-buster MAINTAINER Wang Ruikang -RUN pip3 install pyquery requests && \ +RUN pip3 install pyquery requests minio && \ # Install Nix. To simplify management we only copy binaries and create # symlinks, and do no further configuration curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \ diff --git a/nix-channels.py b/nix-channels.py index 7253d1a..176e4d9 100644 --- a/nix-channels.py +++ b/nix-channels.py @@ -3,11 +3,13 @@ import hashlib import json import logging import lzma +import minio import os +import pytz import re -import sys import requests import subprocess +import sys from pyquery import PyQuery as pq from datetime import datetime, timedelta @@ -35,7 +37,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30)) STORE_DIR = 'store' RELEASES_DIR = 'releases' -CLONE_SINCE = datetime(2018, 12, 1) + +# Channels that have not updated since migration to Netlify [1] are assumed to +# be too old and defunct. +# +# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212 +CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc) TIMEOUT = 60 working_dir = Path(WORKING_DIR) @@ -63,9 +70,6 @@ logging.basicConfig( # Don't forget 'global failure' failure = False -def http_head(*args, **kwargs): - return session.head(*args, timeout=TIMEOUT, **kwargs) - def http_get(*args, **kwargs): return session.get(*args, timeout=TIMEOUT, **kwargs) @@ -131,28 +135,14 @@ def download(url, dest): download_dest.rename(dest) -def get_links(url): - r = http_get(url) - r.raise_for_status() +client = minio.Minio('s3.amazonaws.com') - node = pq(r.content) - - links = [] - for row in node('tr'): - td = pq(row)('td') - if len(td) != 5: - continue - - link_target = td[1].find('a').get('href') - if link_target.startswith('/'): - # Link to parent directory - continue - - last_updated = td[2].text.strip() - - links.append((link_target, last_updated)) - - return links +def get_channels(): + return [ + (x.object_name, x.last_modified) + for x in client.list_objects_v2('nix-channels') + if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name) + ] def clone_channels(): logging.info(f'- Fetching channels') @@ -161,17 +151,15 @@ def clone_channels(): working_dir.mkdir(parents=True, exist_ok=True) - for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'): + for channel, chan_updated in get_channels(): chan_path = working_dir / channel # Old channels, little value in cloning and format changes - if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE: + if chan_updated < CLONE_SINCE: continue - chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False) - chan_redirect_res.raise_for_status() - - chan_location = chan_redirect_res.headers['Location'] + chan_obj = client.get_object('nix-channels', channel) + chan_location = chan_obj.headers['x-amz-website-redirect-location'] chan_release = chan_location.split('/')[-1]