#!/usr/bin/env python3
import hashlib
import logging
import lzma
import minio
import os
import re
import sys
import requests
import subprocess

from pyquery import PyQuery as pq
from datetime import datetime, timedelta
from pathlib import Path
from collections import defaultdict

from minio.credentials import Credentials, Static

from urllib3.util.retry import Retry

UPSTREAM_URL = os.getenv('TUNASYNC_UPSTREAM_URL', 'https://nixos.org/channels')
WORKING_DIR = os.getenv('TUNASYNC_WORKING_DIR', 'working-images')
CLONE_SINCE = datetime(2018, 12, 1)
TIMEOUT = 60

working_dir = Path(WORKING_DIR)

session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
retry_adapter = requests.adapters.HTTPAdapter(max_retries=retries)
session.mount('http://', retry_adapter)
session.mount('https://', retry_adapter)

logging.basicConfig(
    level=logging.INFO,
    format='[%(asctime)s] %(levelname)-8s %(message)s'
)

def http_head(*args, **kwargs):
    return session.head(*args, timeout=TIMEOUT, **kwargs)

def http_get(*args, **kwargs):
    return session.get(*args, timeout=TIMEOUT, **kwargs)

def atomic_write_file(dest, contents):
    dest.parent.mkdir(parents=True, exist_ok=True)
    tmp_dest = dest.parent / f'.{dest.name}.tmp'
    with tmp_dest.open('w') as f:
        f.write(contents)
    tmp_dest.rename(dest)

class WrongSize(RuntimeError):
    def __init__(self, expected, actual):
        super().__init__(f'Wrong file size: expected {expected}, actual {actual}')
        self.actual = actual
        self.expected = expected

def download(url, dest):
    dest.parent.mkdir(parents=True, exist_ok=True)
    download_dest = dest.parent / f'.{dest.name}.tmp'

    retry = retries

    while True:
        with http_get(url, stream=True) as res:
            res.raise_for_status()
            try:
                with download_dest.open('wb') as f:
                    for chunk in res.iter_content(chunk_size=64 * 1024 * 1024):
                        if chunk:
                            f.write(chunk)
                actual_size = download_dest.stat().st_size
                if 'Content-Length' in res.headers:
                    expected_size = int(res.headers['Content-Length'])
                    if actual_size != expected_size:
                        raise WrongSize(expected=expected_size, actual=actual_size)

                break
            except (requests.exceptions.ConnectionError, WrongSize) as e:
                logging.warn(e)
                next_retry = retry.increment(
                    method='GET',
                    url=url,
                    error=e
                )
                if next_retry is None:
                    raise e
                else:
                    retry = next_retry
                    logging.warn(f'Retrying download: {retry}')

    download_dest.rename(dest)

credentials = Credentials(provider=Static())
client = minio.Minio('s3.amazonaws.com', credentials=credentials)

def get_url(name):
    response = client.get_object('nix-channels', name)
    return response.headers['x-amz-website-redirect-location']

def clone_images():
    DOWNLOAD_MATCH = r'nixos-\d\d.\d\d/latest-nixos-\w+-\w+-linux.\w+(.sha256)?'

    object_names = [
        x.object_name
        for x in client.list_objects_v2('nix-channels', recursive=True)
        if re.fullmatch(DOWNLOAD_MATCH, x.object_name)
    ]

    channels = defaultdict(lambda: [])

    for name in object_names:
        chan, file = name.split('/', 1)
        channels[chan].append(file)

    for channel, files in channels.items():
        chan_dir = working_dir / channel
        git_rev = http_get(get_url(f'{channel}/git-revision')).text
        git_rev_path = chan_dir / 'git-revision'

        if git_rev_path.exists() and git_rev == git_rev_path.read_text():
            continue

        logging.info(f'- {channel} -> {git_rev}')

        for file in files:
            logging.info(f'  - {file}')
            url = get_url(f'{channel}/{file}')

            try:
                download(url, chan_dir / file)
            except requests.HTTPError as e:
                if e.response.status_code == 404:
                    logging.info(f'    - 404, skipped')
                else:
                    raise

        atomic_write_file(git_rev_path, git_rev)

if __name__ == "__main__":
    clone_images()