dramforever 2020-03-23 22:04:53 +08:00
parent a14304c1b2
commit 4afe8ac16d

View File

@ -2,6 +2,7 @@
import hashlib import hashlib
import logging import logging
import lzma import lzma
import minio
import os import os
import re import re
import sys import sys
@ -11,6 +12,7 @@ import subprocess
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from collections import defaultdict
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
@ -38,14 +40,8 @@ def http_head(*args, **kwargs):
def http_get(*args, **kwargs): def http_get(*args, **kwargs):
return session.get(*args, timeout=TIMEOUT, **kwargs) return session.get(*args, timeout=TIMEOUT, **kwargs)
def file_sha256(dest):
sha = subprocess.check_output(
[ 'sha256sum', str(dest) ],
universal_newlines=True
)
return sha.split(' ')[0]
def atomic_write_file(dest, contents): def atomic_write_file(dest, contents):
dest.parent.mkdir(parents=True, exist_ok=True)
tmp_dest = dest.parent / f'.{dest.name}.tmp' tmp_dest = dest.parent / f'.{dest.name}.tmp'
with tmp_dest.open('w') as f: with tmp_dest.open('w') as f:
f.write(contents) f.write(contents)
@ -93,156 +89,50 @@ def download(url, dest):
download_dest.rename(dest) download_dest.rename(dest)
def get_links(url): client = minio.Minio('s3.amazonaws.com')
r = http_get(url)
r.raise_for_status()
node = pq(r.content) def get_url(name):
response = client.get_object('nix-channels', name)
links = [] return response.headers['x-amz-website-redirect-location']
for row in node('tr'):
td = pq(row)('td')
if len(td) != 5:
continue
link_target = td[1].find('a').get('href')
if link_target.startswith('/'):
# Link to parent directory
continue
last_updated = td[2].text.strip()
links.append((link_target, last_updated))
return links
def get_channel(chan_location):
release_res = http_get(chan_location)
release_res.raise_for_status()
node = pq(release_res.text)
tagline = node('p').text()
tagline_res = re.match(r'^Released on (.+) from', tagline)
assert tagline_res is not None
released_time = tagline_res[1]
files = []
for row in node('tr'):
td = pq(row)('td')
if len(td) != 3:
continue
file_name, file_size, file_hash = (pq(x).text() for x in td)
files.append((file_name, file_size, file_hash))
return {
'released_time': released_time,
'files': files
}
def clone_images(): def clone_images():
for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'): DOWNLOAD_MATCH = r'nixos-\d\d.\d\d/latest-nixos-\w+-\w+-linux.\w+(.sha256)?'
if not channel.startswith('nixos-') \
or channel.endswith('-small') \ object_names = [
or channel == 'nixos-unstable': x.object_name
for x in client.list_objects_v2('nix-channels', recursive=True)
if re.fullmatch(DOWNLOAD_MATCH, x.object_name)
]
channels = defaultdict(lambda: [])
for name in object_names:
chan, file = name.split('/', 1)
channels[chan].append(file)
for channel, files in channels.items():
chan_dir = working_dir / channel
git_rev = http_get(get_url(f'{channel}/git-revision')).text
git_rev_path = chan_dir / 'git-revision'
if git_rev_path.exists() and git_rev == git_rev_path.read_text():
continue continue
if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE: logging.info(f'- {channel} -> {git_rev}')
continue
chan_path = working_dir / channel for file in files:
chan_path.mkdir(parents=True, exist_ok=True) logging.info(f' - {file}')
url = get_url(f'{channel}/{file}')
res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False) try:
res.raise_for_status() download(url, chan_dir / file)
except requests.HTTPError as e:
if e.response.status_code == 404:
logging.info(f' - 404, skipped')
else:
raise
chan_location = res.headers['Location'] atomic_write_file(git_rev_path, git_rev)
chan_release_basename = chan_location.split('/')[-1]
try:
last_url = (chan_path / '.last-url').read_text()
except (IOError, OSError):
last_url = 'not available'
if chan_location == last_url:
continue
logging.info(f'- {channel} -> {chan_release_basename}')
# Matches nixos-19.03 -> nixos-19.03beta171840.23fd1394dc6
# ^-------------^
if chan_release_basename.startswith(channel + 'beta'):
logging.info(f' - Beta channel, not updating')
continue
chan_info = get_channel(chan_location)
atomic_write_file(chan_path / '.released-time', chan_info['released_time'])
has_hash_fail = False
keep_files = { '.last-url', '.released-time' }
rename_files = []
logging.info(f' - Downloading new files')
chan_version = channel.split('-', 1)[1]
chan_release_version = chan_release_basename.split('-', 1)[1]
simplify_name = lambda fname: fname.replace(f'-{chan_release_version}-', f'-{chan_version}-')
image_files = [
(simplify_name(file_name), file_name, file_hash)
for file_name, _file_size, file_hash in chan_info['files']
if file_name.endswith('.iso') or file_name.endswith('ova')
]
for mirror_file_name, upstream_file_name, file_hash in image_files:
keep_files.add(mirror_file_name)
logging.info(f' - {upstream_file_name} -> {mirror_file_name}')
tmp_dest = f'.update.{upstream_file_name}'
rename_files.append((tmp_dest, mirror_file_name))
download(f'{chan_location}/{upstream_file_name}', chan_path / tmp_dest)
actual_hash = file_sha256(chan_path / tmp_dest)
if file_hash != actual_hash:
has_hash_fail = True
logging.error(f' - Incorrect hash')
logging.error(f' actual {actual_hash}')
logging.error(f' expected {file_hash}')
logging.info(f' - File saved as {tmp_dest}')
if has_hash_fail:
logging.warn(f' - Found bad files. Will retry next time.')
else:
logging.info(f' - Renaming files')
for tmp_dest, mirror_file_name in rename_files:
(chan_path / tmp_dest).rename(chan_path / mirror_file_name)
logging.info(f' - Removing useless files')
for file_path in chan_path.iterdir():
file_name = file_path.name
if file_name not in keep_files:
logging.info(f' - {file_name}')
file_path.unlink()
logging.info(f' - Writing SHA256SUMS')
with (chan_path / 'SHA256SUMS').open('w') as f:
for mirror_file_name, _upstream_file_name, file_hash in image_files:
f.write(f'{file_hash} *{mirror_file_name}\n')
logging.info(f' - Update finished')
atomic_write_file(chan_path / '.last-url', chan_location)
if __name__ == "__main__": if __name__ == "__main__":
clone_images() clone_images()