mirror of
https://github.com/tuna/tunasync-scripts.git
synced 2025-04-19 11:42:43 +00:00
249 lines
7.6 KiB
Python
Executable File
249 lines
7.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import hashlib
|
|
import logging
|
|
import lzma
|
|
import os
|
|
import re
|
|
import sys
|
|
import requests
|
|
import subprocess
|
|
|
|
from pyquery import PyQuery as pq
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
from urllib3.util.retry import Retry
|
|
|
|
UPSTREAM_URL = os.getenv('TUNASYNC_UPSTREAM_URL', 'https://nixos.org/channels')
|
|
WORKING_DIR = os.getenv('TUNASYNC_WORKING_DIR', 'working-images')
|
|
CLONE_SINCE = datetime(2018, 12, 1)
|
|
TIMEOUT = 60
|
|
|
|
working_dir = Path(WORKING_DIR)
|
|
|
|
session = requests.Session()
|
|
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
|
|
retry_adapter = requests.adapters.HTTPAdapter(max_retries=retries)
|
|
session.mount('http://', retry_adapter)
|
|
session.mount('https://', retry_adapter)
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='[%(asctime)s] %(levelname)-8s %(message)s'
|
|
)
|
|
|
|
def http_head(*args, **kwargs):
|
|
return session.head(*args, timeout=TIMEOUT, **kwargs)
|
|
|
|
def http_get(*args, **kwargs):
|
|
return session.get(*args, timeout=TIMEOUT, **kwargs)
|
|
|
|
def file_sha256(dest):
|
|
sha = subprocess.check_output(
|
|
[ 'sha256sum', str(dest) ],
|
|
universal_newlines=True
|
|
)
|
|
return sha.split(' ')[0]
|
|
|
|
def atomic_write_file(dest, contents):
|
|
tmp_dest = dest.parent / f'.{dest.name}.tmp'
|
|
with tmp_dest.open('w') as f:
|
|
f.write(contents)
|
|
tmp_dest.rename(dest)
|
|
|
|
class WrongSize(RuntimeError):
|
|
def __init__(self, expected, actual):
|
|
super().__init__(f'Wrong file size: expected {expected}, actual {actual}')
|
|
self.actual = actual
|
|
self.expected = expected
|
|
|
|
def download(url, dest):
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
download_dest = dest.parent / f'.{dest.name}.tmp'
|
|
|
|
retry = retries
|
|
|
|
while True:
|
|
with http_get(url, stream=True) as res:
|
|
res.raise_for_status()
|
|
try:
|
|
with download_dest.open('wb') as f:
|
|
for chunk in res.iter_content(chunk_size=64 * 1024 * 1024):
|
|
if chunk:
|
|
f.write(chunk)
|
|
actual_size = download_dest.stat().st_size
|
|
if 'Content-Length' in res.headers:
|
|
expected_size = int(res.headers['Content-Length'])
|
|
if actual_size != expected_size:
|
|
raise WrongSize(expected=expected_size, actual=actual_size)
|
|
|
|
break
|
|
except (requests.exceptions.ConnectionError, WrongSize) as e:
|
|
logging.warn(e)
|
|
next_retry = retry.increment(
|
|
method='GET',
|
|
url=url,
|
|
error=e
|
|
)
|
|
if next_retry is None:
|
|
raise e
|
|
else:
|
|
retry = next_retry
|
|
logging.warn(f'Retrying download: {retry}')
|
|
|
|
download_dest.rename(dest)
|
|
|
|
def get_links(url):
|
|
r = http_get(url)
|
|
r.raise_for_status()
|
|
|
|
node = pq(r.content)
|
|
|
|
links = []
|
|
for row in node('tr'):
|
|
td = pq(row)('td')
|
|
if len(td) != 5:
|
|
continue
|
|
|
|
link_target = td[1].find('a').get('href')
|
|
if link_target.startswith('/'):
|
|
# Link to parent directory
|
|
continue
|
|
|
|
last_updated = td[2].text.strip()
|
|
|
|
links.append((link_target, last_updated))
|
|
|
|
return links
|
|
|
|
def get_channel(chan_location):
|
|
release_res = http_get(chan_location)
|
|
release_res.raise_for_status()
|
|
|
|
node = pq(release_res.text)
|
|
|
|
tagline = node('p').text()
|
|
|
|
tagline_res = re.match(r'^Released on (.+) from', tagline)
|
|
|
|
assert tagline_res is not None
|
|
|
|
released_time = tagline_res[1]
|
|
|
|
files = []
|
|
|
|
for row in node('tr'):
|
|
td = pq(row)('td')
|
|
if len(td) != 3:
|
|
continue
|
|
file_name, file_size, file_hash = (pq(x).text() for x in td)
|
|
files.append((file_name, file_size, file_hash))
|
|
|
|
return {
|
|
'released_time': released_time,
|
|
'files': files
|
|
}
|
|
|
|
def clone_images():
|
|
for channel, chan_updated in get_links(f'{UPSTREAM_URL}/'):
|
|
if not channel.startswith('nixos-') \
|
|
or channel.endswith('-small') \
|
|
or channel == 'nixos-unstable':
|
|
continue
|
|
|
|
if datetime.strptime(chan_updated, '%Y-%m-%d %H:%M') < CLONE_SINCE:
|
|
continue
|
|
|
|
chan_path = working_dir / channel
|
|
chan_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
res = http_head(f'{UPSTREAM_URL}/{channel}', allow_redirects=False)
|
|
res.raise_for_status()
|
|
|
|
chan_location = res.headers['Location']
|
|
chan_release_basename = chan_location.split('/')[-1]
|
|
|
|
try:
|
|
last_url = (chan_path / '.last-url').read_text()
|
|
except (IOError, OSError):
|
|
last_url = 'not available'
|
|
|
|
if chan_location == last_url:
|
|
continue
|
|
|
|
logging.info(f'- {channel} -> {chan_release_basename}')
|
|
|
|
# Matches nixos-19.03 -> nixos-19.03beta171840.23fd1394dc6
|
|
# ^-------------^
|
|
if chan_release_basename.startswith(channel + 'beta'):
|
|
logging.info(f' - Beta channel, not updating')
|
|
continue
|
|
|
|
chan_info = get_channel(chan_location)
|
|
|
|
atomic_write_file(chan_path / '.released-time', chan_info['released_time'])
|
|
|
|
has_hash_fail = False
|
|
|
|
keep_files = { '.last-url', '.released-time' }
|
|
rename_files = []
|
|
|
|
logging.info(f' - Downloading new files')
|
|
|
|
chan_version = channel.split('-', 1)[1]
|
|
|
|
chan_release_version = chan_release_basename.split('-', 1)[1]
|
|
|
|
simplify_name = lambda fname: fname.replace(f'-{chan_release_version}-', f'-{chan_version}-')
|
|
|
|
image_files = [
|
|
(simplify_name(file_name), file_name, file_hash)
|
|
for file_name, _file_size, file_hash in chan_info['files']
|
|
if file_name.endswith('.iso') or file_name.endswith('ova')
|
|
]
|
|
|
|
for mirror_file_name, upstream_file_name, file_hash in image_files:
|
|
keep_files.add(mirror_file_name)
|
|
logging.info(f' - {upstream_file_name} -> {mirror_file_name}')
|
|
tmp_dest = f'.update.{upstream_file_name}'
|
|
rename_files.append((tmp_dest, mirror_file_name))
|
|
|
|
download(f'{chan_location}/{upstream_file_name}', chan_path / tmp_dest)
|
|
actual_hash = file_sha256(chan_path / tmp_dest)
|
|
|
|
if file_hash != actual_hash:
|
|
has_hash_fail = True
|
|
logging.error(f' - Incorrect hash')
|
|
logging.error(f' actual {actual_hash}')
|
|
logging.error(f' expected {file_hash}')
|
|
logging.info(f' - File saved as {tmp_dest}')
|
|
|
|
if has_hash_fail:
|
|
logging.warn(f' - Found bad files. Will retry next time.')
|
|
else:
|
|
logging.info(f' - Renaming files')
|
|
|
|
for tmp_dest, mirror_file_name in rename_files:
|
|
(chan_path / tmp_dest).rename(chan_path / mirror_file_name)
|
|
|
|
logging.info(f' - Removing useless files')
|
|
|
|
for file_path in chan_path.iterdir():
|
|
file_name = file_path.name
|
|
|
|
if file_name not in keep_files:
|
|
logging.info(f' - {file_name}')
|
|
file_path.unlink()
|
|
|
|
logging.info(f' - Writing SHA256SUMS')
|
|
|
|
with (chan_path / 'SHA256SUMS').open('w') as f:
|
|
for mirror_file_name, _upstream_file_name, file_hash in image_files:
|
|
f.write(f'{file_hash} *{mirror_file_name}\n')
|
|
|
|
logging.info(f' - Update finished')
|
|
atomic_write_file(chan_path / '.last-url', chan_location)
|
|
|
|
if __name__ == "__main__":
|
|
clone_images()
|