dramforever 2020-03-23 20:33:02 +08:00
parent 0babe7557c
commit a14304c1b2
2 changed files with 21 additions and 33 deletions

View File

@ -1,7 +1,7 @@
FROM python:3-buster
MAINTAINER Wang Ruikang <dramforever@live.com>
RUN pip3 install pyquery requests && \
RUN pip3 install pyquery requests minio && \
# Install Nix. To simplify management we only copy binaries and create
# symlinks, and do no further configuration
curl https://mirrors.tuna.tsinghua.edu.cn/nix/nix-2.3.2/nix-2.3.2-x86_64-linux.tar.xz -o /tmp/nix.tar.xz && \

View File

@ -3,11 +3,13 @@ import hashlib
import json
import logging
import lzma
import minio
import os
import pytz
import re
import sys
import requests
import subprocess
import sys
from pyquery import PyQuery as pq
from datetime import datetime, timedelta
@ -35,7 +37,12 @@ RETAIN_DAYS = float(os.getenv('NIX_MIRROR_RETAIN_DAYS', 30))
STORE_DIR = 'store'
RELEASES_DIR = 'releases'
CLONE_SINCE = datetime(2018, 12, 1)
# Channels that have not updated since migration to Netlify [1] are assumed to
# be too old and defunct.
#
# [1]: https://discourse.nixos.org/t/announcement-moving-nixos-org-to-netlify/6212
CLONE_SINCE = datetime(2020, 3, 6, tzinfo=pytz.utc)
TIMEOUT = 60
working_dir = Path(WORKING_DIR)
@ -63,9 +70,6 @@ logging.basicConfig(
# Don't forget 'global failure'
failure = False
def http_head(*args, **kwargs):
return session.head(*args, timeout=TIMEOUT, **kwargs)
def http_get(*args, **kwargs):
return session.get(*args, timeout=TIMEOUT, **kwargs)
@ -131,28 +135,14 @@ def download(url, dest):
download_dest.rename(dest)
def get_links(url):
r = http_get(url)
r.raise_for_status()
client = minio.Minio('s3.amazonaws.com')
node = pq(r.content)
links = []
for row in node('tr'):
td = pq(row)('td')
if len(td) != 5:
continue
link_target = td[1].find('a').get('href')
if link_target.startswith('/'):
# Link to parent directory
continue
last_updated = td[2].text.strip()
links.append((link_target, last_updated))
return links
def get_channels():
return [
(x.object_name, x.last_modified)
for x in client.list_objects_v2('nix-channels')
if re.fullmatch(r'(nixos|nixpkgs)-.+[^/]', x.object_name)
]
def clone_channels():
logging.info(f'- Fetching channels')
@ -161,17 +151,15 @@ def clone_channels():
working_dir.mkdir(parents=True, exist_ok=True)
for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
for channel, chan_updated in get_channels():
chan_path = working_dir / channel
# Old channels, little value in cloning and format changes
if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
if chan_updated < CLONE_SINCE:
continue
chan_redirect_res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
chan_redirect_res.raise_for_status()
chan_location = chan_redirect_res.headers['Location']
chan_obj = client.get_object('nix-channels', channel)
chan_location = chan_obj.headers['x-amz-website-redirect-location']
chan_release = chan_location.split('/')[-1]