Run formatter on some Python scripts

Signed-off-by: Harry Chen <i@harrychen.xyz>
2025-06-30 15:08:57 +00:00 · 2024-08-17 13:33:31 +08:00 · 2024-08-17 13:33:31 +08:00 · 5f4bc1c260
commit 5f4bc1c260
parent f8afa1f57c
6 changed files with 449 additions and 258 deletions
--- a/adoptium.py
+++ b/adoptium.py
@ -1,17 +1,11 @@
 #!/usr/bin/env python3
 import hashlib
-import traceback
-import json
 import os
-import re
-import shutil
 import subprocess as sp
-import tempfile
-import argparse
 import time
 from email.utils import parsedate_to_datetime
 from pathlib import Path
-from typing import List, Set, Tuple, IO
+from typing import Set
 import requests

 DOWNLOAD_TIMEOUT = int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
--- a/anaconda.py
+++ b/anaconda.py
@ -25,6 +25,7 @@ CONDA_CLOUD_BASE_URL = os.getenv("CONDA_COULD_URL", "https://conda.anaconda.org"

 WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR")

+# fmt: off
 CONDA_REPOS = ("main", "free", "r", "msys2")
 CONDA_ARCHES = (
    "noarch", "linux-64", "linux-32", "linux-aarch64", "linux-armv6l", "linux-armv7l",
@ -72,6 +73,7 @@ CONDA_CLOUD_REPOS = (
 EXCLUDED_PACKAGES = (
    "pytorch-nightly", "pytorch-nightly-cpu", "ignite-nightly",
 )
+# fmt: on

 # connect and read timeout value
 TIMEOUT_OPTION = (7, 10)
@ -84,28 +86,31 @@ logging.basicConfig(
    format="[%(asctime)s] [%(levelname)s] %(message)s",
 )

-def sizeof_fmt(num, suffix='iB'):
-    for unit in ['','K','M','G','T','P','E','Z']:
+
+def sizeof_fmt(num, suffix="iB"):
+    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
        if abs(num) < 1024.0:
            return "%3.2f%s%s" % (num, unit, suffix)
        num /= 1024.0
-    return "%.2f%s%s" % (num, 'Y', suffix)
+    return "%.2f%s%s" % (num, "Y", suffix)
+

 def md5_check(file: Path, md5: str = None):
    m = hashlib.md5()
-    with file.open('rb') as f:
+    with file.open("rb") as f:
        while True:
-            buf = f.read(1*1024*1024)
+            buf = f.read(1 * 1024 * 1024)
            if not buf:
                break
            m.update(buf)
    return m.hexdigest() == md5

+
 def sha256_check(file: Path, sha256: str = None):
    m = hashlib.sha256()
-    with file.open('rb') as f:
+    with file.open("rb") as f:
        while True:
-            buf = f.read(1*1024*1024)
+            buf = f.read(1 * 1024 * 1024)
            if not buf:
                break
            m.update(buf)
@ -113,34 +118,42 @@ def sha256_check(file: Path, sha256: str = None):


 def curl_download(remote_url: str, dst_file: Path, sha256: str = None, md5: str = None):
-    sp.check_call([
-        "curl", "-o", str(dst_file),
-        "-sL", "--remote-time", "--show-error",
-        "--fail", "--retry", "10", "--speed-time", "15",
-        "--speed-limit", "5000", remote_url,
-    ])
+    # fmt: off
+    sp.check_call(
+        [
+            "curl", "-o", str(dst_file),
+            "-sL", "--remote-time", "--show-error",
+            "--fail", "--retry", "10",
+            "--speed-time", "15",
+            "--speed-limit", "5000",
+            remote_url,
+        ]
+    )
+    # fmt: on
    if sha256 and (not sha256_check(dst_file, sha256)):
        return "SHA256 mismatch"
    if md5 and (not md5_check(dst_file, md5)):
        return "MD5 mismatch"


-def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool):
+def sync_repo(
+    repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool
+):
    logging.info("Start syncing {}".format(repo_url))
    local_dir.mkdir(parents=True, exist_ok=True)

-    repodata_url = repo_url + '/repodata.json'
-    bz2_repodata_url = repo_url + '/repodata.json.bz2'
+    repodata_url = repo_url + "/repodata.json"
+    bz2_repodata_url = repo_url + "/repodata.json.bz2"
    # https://github.com/conda/conda/issues/13256, from conda 24.1.x
-    zst_repodata_url = repo_url + '/repodata.json.zst'
+    zst_repodata_url = repo_url + "/repodata.json.zst"
    # https://docs.conda.io/projects/conda-build/en/latest/release-notes.html
    # "current_repodata.json" - like repodata.json, but only has the newest version of each file
-    current_repodata_url = repo_url + '/current_repodata.json'
+    current_repodata_url = repo_url + "/current_repodata.json"

    tmp_repodata = tmpdir / "repodata.json"
    tmp_bz2_repodata = tmpdir / "repodata.json.bz2"
    tmp_zst_repodata = tmpdir / "repodata.json.zst"
-    tmp_current_repodata = tmpdir / 'current_repodata.json'
+    tmp_current_repodata = tmpdir / "current_repodata.json"

    curl_download(repodata_url, tmp_repodata)
    curl_download(bz2_repodata_url, tmp_bz2_repodata)
@ -158,31 +171,33 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove

    remote_filelist = []
    total_size = 0
-    legacy_packages = repodata['packages']
+    legacy_packages = repodata["packages"]
    conda_packages = repodata.get("packages.conda", {})
    if remove_legacy:
        # https://github.com/anaconda/conda/blob/0dbf85e0546e0b0dc060c8265ec936591ccbe980/conda/core/subdir_data.py#L440-L442
-        use_legacy_packages = set(legacy_packages.keys()) - set(k[:-6] + ".tar.bz2" for k in conda_packages.keys())
+        use_legacy_packages = set(legacy_packages.keys()) - set(
+            k[:-6] + ".tar.bz2" for k in conda_packages.keys()
+        )
        legacy_packages = {k: legacy_packages[k] for k in use_legacy_packages}
    packages = {**legacy_packages, **conda_packages}

    for filename, meta in packages.items():
-        if meta['name'] in EXCLUDED_PACKAGES:
+        if meta["name"] in EXCLUDED_PACKAGES:
            continue

-        file_size = meta['size']
+        file_size = meta["size"]
        # prefer sha256 over md5
        sha256 = None
        md5 = None
-        if 'sha256' in meta:
-            sha256 = meta['sha256']
-        elif 'md5' in meta:
-            md5 = meta['md5']
+        if "sha256" in meta:
+            sha256 = meta["sha256"]
+        elif "md5" in meta:
+            md5 = meta["md5"]
        total_size += file_size

-        pkg_url = '/'.join([repo_url, filename])
+        pkg_url = "/".join([repo_url, filename])
        dst_file = local_dir / filename
-        dst_file_wip = local_dir / ('.downloading.' + filename)
+        dst_file_wip = local_dir / (".downloading." + filename)
        remote_filelist.append(dst_file)

        if dst_file.is_file():
@ -202,7 +217,7 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
                if err is None:
                    dst_file_wip.rename(dst_file)
            except sp.CalledProcessError:
-                err = 'CalledProcessError'
+                err = "CalledProcessError"
            if err is None:
                break
            logging.error("Failed to download {}: {}".format(filename, err))
@ -223,11 +238,15 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
    tmp_current_repodata_gz_gened = False
    if tmp_current_repodata.is_file():
        if os.path.getsize(tmp_current_repodata) > GEN_METADATA_JSON_GZIP_THRESHOLD:
-            sp.check_call(["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)])
-            shutil.move(str(tmp_current_repodata) + ".gz", str(local_dir / "current_repodata.json.gz"))
+            sp.check_call(
+                ["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)]
+            )
+            shutil.move(
+                str(tmp_current_repodata) + ".gz",
+                str(local_dir / "current_repodata.json.gz"),
+            )
            tmp_current_repodata_gz_gened = True
-        shutil.move(str(tmp_current_repodata), str(
-            local_dir / "current_repodata.json"))
+        shutil.move(str(tmp_current_repodata), str(local_dir / "current_repodata.json"))
    if not tmp_current_repodata_gz_gened:
        # If the gzip file is not generated, remove the dangling gzip archive
        Path(local_dir / "current_repodata.json.gz").unlink(missing_ok=True)
@ -235,9 +254,9 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
    if delete:
        local_filelist = []
        delete_count = 0
-        for i in local_dir.glob('*.tar.bz2'):
+        for i in local_dir.glob("*.tar.bz2"):
            local_filelist.append(i)
-        for i in local_dir.glob('*.conda'):
+        for i in local_dir.glob("*.conda"):
            local_filelist.append(i)
        for i in set(local_filelist) - set(remote_filelist):
            logging.info("Deleting {}".format(i))
@ -245,46 +264,53 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
            delete_count += 1
        logging.info("{} files deleted".format(delete_count))

-    logging.info("{}: {} files, {} in total".format(
-        repodata_url, len(remote_filelist), sizeof_fmt(total_size)))
+    logging.info(
+        "{}: {} files, {} in total".format(
+            repodata_url, len(remote_filelist), sizeof_fmt(total_size)
+        )
+    )
    return total_size

+
 def sync_installer(repo_url, local_dir: Path):
    logging.info("Start syncing {}".format(repo_url))
    local_dir.mkdir(parents=True, exist_ok=True)
-    full_scan = random.random() < 0.1 # Do full version check less frequently
+    full_scan = random.random() < 0.1  # Do full version check less frequently

    def remote_list():
        r = requests.get(repo_url, timeout=TIMEOUT_OPTION)
        d = pq(r.content)
-        for tr in d('table').find('tr'):
-            tds = pq(tr).find('td')
+        for tr in d("table").find("tr"):
+            tds = pq(tr).find("td")
            if len(tds) != 4:
                continue
-            fname = tds[0].find('a').text
+            fname = tds[0].find("a").text
            sha256 = tds[3].text
-            if sha256 == '<directory>' or len(sha256) != 64:
+            if sha256 == "<directory>" or len(sha256) != 64:
                continue
            yield (fname, sha256)

    for filename, sha256 in remote_list():
        pkg_url = "/".join([repo_url, filename])
        dst_file = local_dir / filename
-        dst_file_wip = local_dir / ('.downloading.' + filename)
+        dst_file_wip = local_dir / (".downloading." + filename)

        if dst_file.is_file():
            r = requests.head(pkg_url, allow_redirects=True, timeout=TIMEOUT_OPTION)
-            len_avail = 'content-length' in r.headers
+            len_avail = "content-length" in r.headers
            if len_avail:
-                remote_filesize = int(r.headers['content-length'])
-            remote_date = parsedate_to_datetime(r.headers['last-modified'])
+                remote_filesize = int(r.headers["content-length"])
+            remote_date = parsedate_to_datetime(r.headers["last-modified"])
            stat = dst_file.stat()
            local_filesize = stat.st_size
            local_mtime = stat.st_mtime

            # Do content verification on ~5% of files (see issue #25)
-            if (not len_avail or remote_filesize == local_filesize) and remote_date.timestamp() == local_mtime and \
-                    (random.random() < 0.95 or sha256_check(dst_file, sha256)):
+            if (
+                (not len_avail or remote_filesize == local_filesize)
+                and remote_date.timestamp() == local_mtime
+                and (random.random() < 0.95 or sha256_check(dst_file, sha256))
+            ):
                logging.info("Skipping {}".format(filename))

                # Stop the scanning if the most recent version is present
@ -299,25 +325,31 @@ def sync_installer(repo_url, local_dir: Path):

        for retry in range(3):
            logging.info("Downloading {}".format(filename))
-            err = ''
+            err = ""
            try:
                err = curl_download(pkg_url, dst_file_wip, sha256=sha256)
                if err is None:
                    dst_file_wip.rename(dst_file)
            except sp.CalledProcessError:
-                err = 'CalledProcessError'
+                err = "CalledProcessError"
            if err is None:
                break
            logging.error("Failed to download {}: {}".format(filename, err))

+
 def main():
    import argparse
+
    parser = argparse.ArgumentParser()
    parser.add_argument("--working-dir", default=WORKING_DIR)
-    parser.add_argument("--delete", action='store_true',
-                        help='delete unreferenced package files')
-    parser.add_argument("--remove-legacy", action='store_true',
-                        help='delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0')
+    parser.add_argument(
+        "--delete", action="store_true", help="delete unreferenced package files"
+    )
+    parser.add_argument(
+        "--remove-legacy",
+        action="store_true",
+        help="delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0",
+    )
    args = parser.parse_args()

    if args.working_dir is None:
@ -336,7 +368,8 @@ def main():
        try:
            sync_installer(remote_url, local_dir)
            size_statistics += sum(
-                f.stat().st_size for f in local_dir.glob('*') if f.is_file())
+                f.stat().st_size for f in local_dir.glob("*") if f.is_file()
+            )
        except Exception:
            logging.exception("Failed to sync installers of {}".format(dist))
            success = False
@ -348,8 +381,9 @@ def main():

            tmpdir = tempfile.mkdtemp()
            try:
-                size_statistics += sync_repo(remote_url,
-                                             local_dir, Path(tmpdir), args.delete, args.remove_legacy)
+                size_statistics += sync_repo(
+                    remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy
+                )
            except Exception:
                logging.exception("Failed to sync repo: {}/{}".format(repo, arch))
                success = False
@ -362,8 +396,9 @@ def main():

        tmpdir = tempfile.mkdtemp()
        try:
-            size_statistics += sync_repo(remote_url,
-                                         local_dir, Path(tmpdir), args.delete, args.remove_legacy)
+            size_statistics += sync_repo(
+                remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy
+            )
        except Exception:
            logging.exception("Failed to sync repo: {}".format(repo))
            success = False
@ -374,6 +409,7 @@ def main():
    if not success:
        sys.exit(1)

+
 if __name__ == "__main__":
    main()

--- a/apt-sync.py
+++ b/apt-sync.py
@ -4,7 +4,6 @@ import traceback
 import os
 import re
 import shutil
-import subprocess as sp
 import argparse
 import bz2
 import gzip
@ -23,21 +22,27 @@ requests.utils.default_user_agent = lambda: APT_SYNC_USER_AGENT

 # set preferred address family
 import requests.packages.urllib3.util.connection as urllib3_cn
-USE_ADDR_FAMILY = os.getenv('USE_ADDR_FAMILY', '').strip().lower()
-if USE_ADDR_FAMILY != '':
-    assert USE_ADDR_FAMILY in ['ipv4', 'ipv6'], "USE_ADDR_FAMILY must be either ipv4 or ipv6"
-    urllib3_cn.allowed_gai_family = lambda: socket.AF_INET if USE_ADDR_FAMILY == 'ipv4' else socket.AF_INET6
+
+USE_ADDR_FAMILY = os.getenv("USE_ADDR_FAMILY", "").strip().lower()
+if USE_ADDR_FAMILY != "":
+    assert USE_ADDR_FAMILY in [
+        "ipv4",
+        "ipv6",
+    ], "USE_ADDR_FAMILY must be either ipv4 or ipv6"
+    urllib3_cn.allowed_gai_family = lambda: (
+        socket.AF_INET if USE_ADDR_FAMILY == "ipv4" else socket.AF_INET6
+    )

 OS_TEMPLATE = {
-    'ubuntu-lts': ["focal", "jammy", "noble"],
-    'debian-current': ["bullseye", "bookworm"],
-    'debian-latest2': ["bullseye", "bookworm"],
-    'debian-latest': ["bookworm"],
+    "ubuntu-lts": ["focal", "jammy", "noble"],
+    "debian-current": ["bullseye", "bookworm"],
+    "debian-latest2": ["bullseye", "bookworm"],
+    "debian-latest": ["bookworm"],
 }
-ARCH_NO_PKGIDX = ['dep11', 'i18n', 'cnf']
-MAX_RETRY=int(os.getenv('MAX_RETRY', '3'))
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
-REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '')
+ARCH_NO_PKGIDX = ["dep11", "i18n", "cnf"]
+MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
+REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "")

 pattern_os_template = re.compile(r"@\{(.+)\}")
 pattern_package_name = re.compile(r"^Filename: (.+)$", re.MULTILINE)
@ -45,11 +50,13 @@ pattern_package_size = re.compile(r"^Size: (\d+)$", re.MULTILINE)
 pattern_package_sha256 = re.compile(r"^SHA256: (\w{64})$", re.MULTILINE)
 download_cache = dict()

+
 def check_args(prop: str, lst: List[str]):
    for s in lst:
-        if len(s)==0 or ' ' in s:
+        if len(s) == 0 or " " in s:
            raise ValueError(f"Invalid item in {prop}: {repr(s)}")

+
 def replace_os_template(os_list: List[str]) -> List[str]:
    ret = []
    for i in os_list:
@ -57,103 +64,137 @@ def replace_os_template(os_list: List[str]) -> List[str]:
        if matched:
            for os in OS_TEMPLATE[matched.group(1)]:
                ret.append(pattern_os_template.sub(os, i))
-        elif i.startswith('@'):
+        elif i.startswith("@"):
            ret.extend(OS_TEMPLATE[i[1:]])
        else:
            ret.append(i)
    return ret

-def check_and_download(url: str, dst_file: Path, caching = False)->int:
+
+def check_and_download(url: str, dst_file: Path, caching=False) -> int:
    try:
        if caching:
            if url in download_cache:
                print(f"Using cached content: {url}", flush=True)
-                with dst_file.open('wb') as f:
+                with dst_file.open("wb") as f:
                    f.write(download_cache[url])
                return 0
            download_cache[url] = bytes()
        start = time.time()
        with requests.get(url, stream=True, timeout=(5, 10)) as r:
            r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None

-            with dst_file.open('wb') as f:
+            with dst_file.open("wb") as f:
                for chunk in r.iter_content(chunk_size=1024**2):
                    if time.time() - start > DOWNLOAD_TIMEOUT:
                        raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks

                    f.write(chunk)
-                    if caching: download_cache[url] += chunk
+                    if caching:
+                        download_cache[url] += chunk
            if remote_ts is not None:
                os.utime(dst_file, (remote_ts, remote_ts))
        return 0
    except BaseException as e:
        print(e, flush=True)
-        if dst_file.is_file(): dst_file.unlink()
-        if url in download_cache: del download_cache[url]
+        if dst_file.is_file():
+            dst_file.unlink()
+        if url in download_cache:
+            del download_cache[url]
    return 1

-def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
+
+def mkdir_with_dot_tmp(folder: Path) -> Tuple[Path, Path]:
    tmpdir = folder / ".tmp"
    if tmpdir.is_dir():
        shutil.rmtree(str(tmpdir))
    tmpdir.mkdir(parents=True, exist_ok=True)
    return (folder, tmpdir)

+
 def move_files_in(src: Path, dst: Path):
    empty = True
-    for file in src.glob('*'):
+    for file in src.glob("*"):
        empty = False
        print(f"moving {file} to {dst}")
        # shutil.move(str(file), str(dst))
        if file.is_dir():
            (dst / file.name).mkdir(parents=True, exist_ok=True)
            move_files_in(file, dst / file.name)
-            file.rmdir() # rmdir wont fail as all files in it have been moved
+            file.rmdir()  # rmdir wont fail as all files in it have been moved
        else:
-            file.rename(dst / file.name) # Overwrite files
+            file.rename(dst / file.name)  # Overwrite files
    if empty:
        print(f"{src} is empty")

-def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Path, deb_set: Dict[str, int])->int:
+
+def apt_mirror(
+    base_url: str,
+    dist: str,
+    repo: str,
+    arch: str,
+    dest_base_dir: Path,
+    deb_set: Dict[str, int],
+) -> int:
    if not dest_base_dir.is_dir():
        print("Destination directory is empty, cannot continue")
        return 1
    print(f"Started mirroring {base_url} {dist}, {repo}, {arch}!", flush=True)

-	# download Release files
-    dist_dir,dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist)
-    check_and_download(f"{base_url}/dists/{dist}/InRelease",dist_tmp_dir / "InRelease", caching=True)
-    if check_and_download(f"{base_url}/dists/{dist}/Release",dist_tmp_dir / "Release", caching=True) != 0:
+    # download Release files
+    dist_dir, dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist)
+    check_and_download(
+        f"{base_url}/dists/{dist}/InRelease", dist_tmp_dir / "InRelease", caching=True
+    )
+    if (
+        check_and_download(
+            f"{base_url}/dists/{dist}/Release", dist_tmp_dir / "Release", caching=True
+        )
+        != 0
+    ):
        print("Invalid Repository")
-        if not (dist_dir/"Release").is_file():
-            print(f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error")
+        if not (dist_dir / "Release").is_file():
+            print(
+                f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error"
+            )
            return 0
        return 1
-    check_and_download(f"{base_url}/dists/{dist}/Release.gpg",dist_tmp_dir / "Release.gpg", caching=True)
+    check_and_download(
+        f"{base_url}/dists/{dist}/Release.gpg",
+        dist_tmp_dir / "Release.gpg",
+        caching=True,
+    )

-    comp_dir,comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo)
+    comp_dir, comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo)

-	# load Package Index URLs from the Release file
+    # load Package Index URLs from the Release file
    release_file = dist_tmp_dir / "Release"
    arch_dir = arch if arch in ARCH_NO_PKGIDX else f"binary-{arch}"
-    pkgidx_dir,pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir)
+    pkgidx_dir, pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir)
    with open(release_file, "r") as fd:
-        pkgidx_content=None
-        cnt_start=False
+        pkgidx_content = None
+        cnt_start = False
        for line in fd:
            if cnt_start:
                fields = line.split()
-                if len(fields) != 3 or len(fields[0]) != 64: # 64 is SHA-256 checksum length
+                if (
+                    len(fields) != 3 or len(fields[0]) != 64
+                ):  # 64 is SHA-256 checksum length
                    break
                checksum, filesize, filename = tuple(fields)
-                if filename.startswith(f"{repo}/{arch_dir}/") or \
-                   filename.startswith(f"{repo}/Contents-{arch}") or \
-                   filename.startswith(f"Contents-{arch}"):
+                if (
+                    filename.startswith(f"{repo}/{arch_dir}/")
+                    or filename.startswith(f"{repo}/Contents-{arch}")
+                    or filename.startswith(f"Contents-{arch}")
+                ):
                    fn = Path(filename)
                    if len(fn.parts) <= 3:
                        # Contents-amd64.gz
@ -163,7 +204,13 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
                    else:
                        # main/dep11/by-hash/MD5Sum/0af5c69679a24671cfd7579095a9cb5e
                        # deep_tmp_dir is in pkgidx_tmp_dir hence no extra garbage collection needed
-                        deep_tmp_dir = dist_dir / Path(fn.parts[0]) / Path(fn.parts[1]) / ".tmp" / Path('/'.join(fn.parts[2:-1]))
+                        deep_tmp_dir = (
+                            dist_dir
+                            / Path(fn.parts[0])
+                            / Path(fn.parts[1])
+                            / ".tmp"
+                            / Path("/".join(fn.parts[2:-1]))
+                        )
                        deep_tmp_dir.mkdir(parents=True, exist_ok=True)
                        pkgidx_file = deep_tmp_dir / fn.name
                else:
@ -174,33 +221,41 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
                    print("Failed to download:", pkglist_url)
                    continue

-                with pkgidx_file.open('rb') as t: content = t.read()
+                with pkgidx_file.open("rb") as t:
+                    content = t.read()
                if len(content) != int(filesize):
-                    print(f"Invalid size of {pkgidx_file}, expected {filesize}, skipped")
+                    print(
+                        f"Invalid size of {pkgidx_file}, expected {filesize}, skipped"
+                    )
                    pkgidx_file.unlink()
                    continue
                if hashlib.sha256(content).hexdigest() != checksum:
-                    print(f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped")
+                    print(
+                        f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped"
+                    )
                    pkgidx_file.unlink()
                    continue
-                if pkgidx_content is None and pkgidx_file.stem == 'Packages':
-                    print(f"getting packages index content from {pkgidx_file.name}", flush=True)
+                if pkgidx_content is None and pkgidx_file.stem == "Packages":
+                    print(
+                        f"getting packages index content from {pkgidx_file.name}",
+                        flush=True,
+                    )
                    suffix = pkgidx_file.suffix
-                    if suffix == '.xz':
-                        pkgidx_content = lzma.decompress(content).decode('utf-8')
-                    elif suffix == '.bz2':
-                        pkgidx_content = bz2.decompress(content).decode('utf-8')
-                    elif suffix == '.gz':
-                        pkgidx_content = gzip.decompress(content).decode('utf-8')
-                    elif suffix == '':
-                        pkgidx_content = content.decode('utf-8')
+                    if suffix == ".xz":
+                        pkgidx_content = lzma.decompress(content).decode("utf-8")
+                    elif suffix == ".bz2":
+                        pkgidx_content = bz2.decompress(content).decode("utf-8")
+                    elif suffix == ".gz":
+                        pkgidx_content = gzip.decompress(content).decode("utf-8")
+                    elif suffix == "":
+                        pkgidx_content = content.decode("utf-8")
                    else:
                        print("unsupported format")

            # Currently only support SHA-256 checksum, because
            # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
            # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
-            if line.startswith('SHA256:'):
+            if line.startswith("SHA256:"):
                cnt_start = True
    if not cnt_start:
        print("Cannot find SHA-256 checksum")
@ -219,6 +274,7 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
        except:
            traceback.print_exc()
            return 1
+
    if arch in ARCH_NO_PKGIDX:
        if collect_tmp_dir() == 1:
            return 1
@ -227,8 +283,10 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa

    if pkgidx_content is None:
        print("index is empty, failed")
-        if len(list(pkgidx_dir.glob('Packages*'))) == 0:
-            print(f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error")
+        if len(list(pkgidx_dir.glob("Packages*"))) == 0:
+            print(
+                f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error"
+            )
            return 0
        return 1

@ -236,8 +294,8 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
    err = 0
    deb_count = 0
    deb_size = 0
-    for pkg in pkgidx_content.split('\n\n'):
-        if len(pkg) < 10: # ignore blanks
+    for pkg in pkgidx_content.split("\n\n"):
+        if len(pkg) < 10:  # ignore blanks
            continue
        try:
            pkg_filename = pattern_package_name.search(pkg).group(1)
@ -255,14 +313,14 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
        dest_dir = dest_filename.parent
        if not dest_dir.is_dir():
            dest_dir.mkdir(parents=True, exist_ok=True)
-        if dest_filename.suffix == '.deb':
+        if dest_filename.suffix == ".deb":
            deb_set[str(dest_filename.relative_to(dest_base_dir))] = pkg_size
        if dest_filename.is_file() and dest_filename.stat().st_size == pkg_size:
            print(f"Skipping {pkg_filename}, size {pkg_size}")
            continue

-        pkg_url=f"{base_url}/{pkg_filename}"
-        dest_tmp_filename = dest_filename.with_name('._syncing_.' + dest_filename.name)
+        pkg_url = f"{base_url}/{pkg_filename}"
+        dest_tmp_filename = dest_filename.with_name("._syncing_." + dest_filename.name)
        for retry in range(MAX_RETRY):
            print(f"downloading {pkg_url} to {dest_filename}", flush=True)
            # break # dry run
@ -289,19 +347,25 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
    print(f"{deb_count} packages, {deb_size} bytes in total", flush=True)
    return err

+
 def apt_delete_old_debs(dest_base_dir: Path, remote_set: Dict[str, int], dry_run: bool):
-    on_disk = set([
-        str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob('**/*.deb')])
+    on_disk = set(
+        [str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob("**/*.deb")]
+    )
    deleting = on_disk - remote_set.keys()
    # print(on_disk)
    # print(remote_set)
-    print(f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}", flush=True)
+    print(
+        f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}",
+        flush=True,
+    )
    for i in deleting:
        if dry_run:
            print("Will delete", i)
        else:
            print("Deleting", i)
-            (dest_base_dir/i).unlink()
+            (dest_base_dir / i).unlink()
+

 def main():

@ -311,31 +375,35 @@ def main():
    parser.add_argument("component", type=str, help="e.g. multiverse,contrib")
    parser.add_argument("arch", type=str, help="e.g. i386,amd64")
    parser.add_argument("working_dir", type=Path, help="working directory")
-    parser.add_argument("--delete", action='store_true',
-                        help='delete unreferenced package files')
-    parser.add_argument("--delete-dry-run", action='store_true',
-                        help='print package files to be deleted only')
+    parser.add_argument(
+        "--delete", action="store_true", help="delete unreferenced package files"
+    )
+    parser.add_argument(
+        "--delete-dry-run",
+        action="store_true",
+        help="print package files to be deleted only",
+    )
    args = parser.parse_args()

    # generate lists of os codenames
-    os_list = args.os_version.split(',')
+    os_list = args.os_version.split(",")
    check_args("os_version", os_list)
    os_list = replace_os_template(os_list)

    # generate a list of components and archs for each os codename
    def generate_list_for_oses(raw: str, name: str) -> List[List[str]]:
        n_os = len(os_list)
-        if ':' in raw:
+        if ":" in raw:
            # specify os codenames for each component
            lists = []
-            for l in raw.split(':'):
-                list_for_os = l.split(',')
+            for l in raw.split(":"):
+                list_for_os = l.split(",")
                check_args(name, list_for_os)
                lists.append(list_for_os)
            assert len(lists) == n_os, f"{name} must be specified for each component"
        else:
            # use same os codenames for all components
-            l = raw.split(',')
+            l = raw.split(",")
            check_args(name, l)
            lists = [l] * n_os
        return lists
@ -350,7 +418,12 @@ def main():
    for os, arch_list, comp_list in zip(os_list, arch_lists, component_lists):
        for comp in comp_list:
            for arch in arch_list:
-                if apt_mirror(args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set) != 0:
+                if (
+                    apt_mirror(
+                        args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set
+                    )
+                    != 0
+                ):
                    failed.append((os, comp, arch))
    if len(failed) > 0:
        print(f"Failed APT repos of {args.base_url}: ", failed)
@ -363,5 +436,6 @@ def main():
            total_size = sum(deb_set.values())
            fd.write(f"+{total_size}")

+
 if __name__ == "__main__":
    main()
--- a/github-raw.py
+++ b/github-raw.py
@ -1,36 +1,46 @@
 #!/usr/bin/env python3
 import os
-import sys
 import threading
-import traceback
 import queue
 from pathlib import Path
-from datetime import datetime
 import tempfile
-import hashlib

 import requests

 BASE_URL = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/")
 WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR")
-MIRROR_BASE_URL = os.getenv("MIRROR_BASE_URL", 'https://mirrors.tuna.tsinghua.edu.cn/github-raw/')
+MIRROR_BASE_URL = os.getenv(
+    "MIRROR_BASE_URL", "https://mirrors.tuna.tsinghua.edu.cn/github-raw/"
+)
+

 def raw_to_mirror(s: str) -> str:
-    return s.replace("https://raw.githubusercontent.com/",
-            MIRROR_BASE_URL)
+    return s.replace("https://raw.githubusercontent.com/", MIRROR_BASE_URL)
+

 def delete_line_with(w: str, s: str) -> str:
    return "\n".join(list(filter(lambda x: x.count(w) == 0, s.splitlines())))

+
 def delete_line_with_gbpdistro(s: str) -> str:
    return delete_line_with("gbpdistro", s)

+
 REPOS = [
    # owner/repo, tree, tree, tree, blob
    ## for stackage
    ["fpco/stackage-content", "master", "stack", "global-hints.yaml"],
    ## for rosdep
-    { "path": ["ros/rosdistro", "master", "rosdep", "sources.list.d", "20-default.list"], "filter": [ raw_to_mirror, delete_line_with_gbpdistro ] },
+    {
+        "path": [
+            "ros/rosdistro",
+            "master",
+            "rosdep",
+            "sources.list.d",
+            "20-default.list",
+        ],
+        "filter": [raw_to_mirror, delete_line_with_gbpdistro],
+    },
    ["ros/rosdistro", "master", "rosdep", "osx-homebrew.yaml"],
    ["ros/rosdistro", "master", "rosdep", "base.yaml"],
    ["ros/rosdistro", "master", "rosdep", "python.yaml"],
@ -44,36 +54,46 @@ REPOS = [
 TIMEOUT_OPTION = (7, 10)
 total_size = 0

+
 # wrap around requests.get to use token if available
 def github_get(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
-    if 'GITHUB_TOKEN' in os.environ:
-        headers['Authorization'] = 'token {}'.format(
-            os.environ['GITHUB_TOKEN'])
-    kwargs['headers'] = headers
+    headers = kwargs["headers"] if "headers" in kwargs else {}
+    if "GITHUB_TOKEN" in os.environ:
+        headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"])
+    kwargs["headers"] = headers
    return requests.get(*args, **kwargs)

+
 def github_tree(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
+    headers = kwargs["headers"] if "headers" in kwargs else {}
    headers["Accept"] = "application/vnd.github.v3+json"
-    kwargs['headers'] = headers
+    kwargs["headers"] = headers
    return github_get(*args, **kwargs)

+
 # NOTE blob API supports file up to 100MB
 # To get larger one, we need raw.githubcontent, which is not implemented now
 def github_blob(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
+    headers = kwargs["headers"] if "headers" in kwargs else {}
    headers["Accept"] = "application/vnd.github.v3.raw"
-    kwargs['headers'] = headers
+    kwargs["headers"] = headers
    return github_get(*args, **kwargs)

-def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None):
+
+def do_download(
+    remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None
+):
    # NOTE the stream=True parameter below
    with github_blob(remote_url, stream=True) as r:
        r.raise_for_status()
        tmp_dst_file = None
        try:
-            with tempfile.NamedTemporaryFile(prefix="." + dst_file.name + ".", suffix=".tmp", dir=dst_file.parent, delete=False) as f:
+            with tempfile.NamedTemporaryFile(
+                prefix="." + dst_file.name + ".",
+                suffix=".tmp",
+                dir=dst_file.parent,
+                delete=False,
+            ) as f:
                tmp_dst_file = Path(f.name)
                for chunk in r.iter_content(chunk_size=1024**2):
                    if chunk:  # filter out keep-alive new chunks
@ -82,7 +102,9 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil
            # check for downloaded size
            downloaded_size = tmp_dst_file.stat().st_size
            if remote_size != -1 and downloaded_size != remote_size:
-                raise Exception(f'File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes')
+                raise Exception(
+                    f"File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes"
+                )
            if filter != None:
                with open(tmp_dst_file, "r+") as f:
                    s = f.read()
@ -108,25 +130,26 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil
                if tmp_dst_file.is_file():
                    tmp_dst_file.unlink()

+
 def downloading_worker(q):
    while True:
        item = q.get()
        if item is None:
            break

-        filter = item.pop(0) # remove filter
+        filter = item.pop(0)  # remove filter

-        dst_file = Path('/'.join(item))
+        dst_file = Path("/".join(item))
        dst_file.parent.mkdir(parents=True, exist_ok=True)

-        item.pop(0) # remove working dir
+        item.pop(0)  # remove working dir
        owner_repo = item.pop(0)
        try:
            tree = item.pop(0)
            tree_child = item.pop(0)
            child_is_leaf = False
-            url = ''
-            sha = ''
+            url = ""
+            sha = ""
            size = 0
            while not child_is_leaf:
                with github_tree(f"{BASE_URL}{owner_repo}/git/trees/{tree}") as r:
@ -147,8 +170,7 @@ def downloading_worker(q):
                            break
                    else:
                        raise Exception
-            if not dst_file.is_symlink() or \
-                Path(os.readlink(dst_file)).name != sha:
+            if not dst_file.is_symlink() or Path(os.readlink(dst_file)).name != sha:
                do_download(url, dst_file, size, sha, filter)
            else:
                print("Skip", dst_file)
@ -164,16 +186,19 @@ def downloading_worker(q):
 def create_workers(n):
    task_queue = queue.Queue()
    for i in range(n):
-        t = threading.Thread(target=downloading_worker, args=(task_queue, ))
+        t = threading.Thread(target=downloading_worker, args=(task_queue,))
        t.start()
    return task_queue

+
 def main():
    import argparse
+
    parser = argparse.ArgumentParser()
    parser.add_argument("--working-dir", default=WORKING_DIR)
-    parser.add_argument("--workers", default=1, type=int,
-                        help='number of concurrent downloading jobs')
+    parser.add_argument(
+        "--workers", default=1, type=int, help="number of concurrent downloading jobs"
+    )
    args = parser.parse_args()

    if args.working_dir is None:
@ -198,6 +223,7 @@ def main():
    for i in range(args.workers):
        task_queue.put(None)

+
 if __name__ == "__main__":
    main()

--- a/homebrew-bottles.py
+++ b/homebrew-bottles.py
@ -10,25 +10,30 @@ from pathlib import Path

 # mainly from apt-sync.py

-FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages")
+FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv(
+    "TUNASYNC_UPSTREAM_URL",
+    "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages",
+)
 WORKING_DIR = Path(os.getenv("TUNASYNC_WORKING_DIR", "/data"))
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))

 github_api_headers = {
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28",
 }

-if 'GITHUB_TOKEN' in os.environ:
-    github_api_headers['Authorization'] = 'token {}'.format(
-        os.environ['GITHUB_TOKEN'])
+if "GITHUB_TOKEN" in os.environ:
+    github_api_headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"])
 else:
    # https://github.com/actions/upload-artifact/issues/51
    # the token should have 'public_repo' access
    raise Exception("GITHUB_TOKEN is required")

+
 def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory: Path):
-    artifacts = requests.get(FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers)
+    artifacts = requests.get(
+        FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers
+    )
    artifacts.raise_for_status()
    artifacts = artifacts.json()
    latest = None
@ -40,7 +45,10 @@ def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory:

    check_and_download(zip_url, zip_file, zip_file, github_api_headers)
    sp.run(["unzip", str(zip_file), "-d", str(unzip_directory)])
-    sp.run(["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")])
+    sp.run(
+        ["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")]
+    )
+

 def bottles(formula_file: Path):
    b = {}
@ -49,7 +57,7 @@ def bottles(formula_file: Path):
    for formula in formulae:
        if formula["versions"]["bottle"] and "stable" in formula["bottle"]:
            bs = formula["bottle"]["stable"]
-            for (platform, v) in bs["files"].items():
+            for platform, v in bs["files"].items():
                sha256 = v["sha256"]
                url = v["url"]
                name = formula["name"]
@ -63,28 +71,36 @@ def bottles(formula_file: Path):
                }
    return b

+
 ghcr_headers = {
    "Accept": "application/vnd.oci.image.index.v1+json",
-    "Authorization": "Bearer QQ=="
+    "Authorization": "Bearer QQ==",
 }

+
 # borrowed from apt-sync.py
-def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers):
-    if dst_file.is_file(): return 2 # old file
+def check_and_download(
+    url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers
+):
+    if dst_file.is_file():
+        return 2  # old file
    try:
        start = time.time()
        with requests.get(url, stream=True, timeout=(5, 10), headers=headers) as r:
            r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None

-            with dst_tmp_file.open('wb') as f:
+            with dst_tmp_file.open("wb") as f:
                for chunk in r.iter_content(chunk_size=1024**2):
                    if time.time() - start > DOWNLOAD_TIMEOUT:
                        raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks

                    f.write(chunk)
            if remote_ts is not None:
@ -92,9 +108,11 @@ def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghc
        return 0
    except BaseException as e:
        print(e, flush=True)
-        if dst_tmp_file.is_file(): dst_tmp_file.unlink()
+        if dst_tmp_file.is_file():
+            dst_tmp_file.unlink()
    return 1

+
 if __name__ == "__main__":
    # clean tmp file from previous sync
    TMP_DIR = WORKING_DIR / ".tmp"
--- a/yum-sync.py
+++ b/yum-sync.py
@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import traceback
 import os
-import sys
 import subprocess as sp
 import tempfile
 import argparse
@ -16,47 +15,50 @@ from pathlib import Path
 from typing import List, Dict
 import requests

-REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '')
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
+REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "")
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
 REPO_STAT = {}

+
 def calc_repo_size(path: Path):
-    dbfiles = path.glob('repodata/*primary.*')
+    dbfiles = path.glob("repodata/*primary.*")
    with tempfile.NamedTemporaryFile() as tmp:
        dec = None
        dbfile = None
        for db in dbfiles:
            dbfile = db
            suffixes = db.suffixes
-            if suffixes[-1] == '.bz2':
+            if suffixes[-1] == ".bz2":
                dec = bz2.decompress
                suffixes = suffixes[:-1]
-            elif suffixes[-1] == '.gz':
+            elif suffixes[-1] == ".gz":
                dec = gzip.decompress
                suffixes = suffixes[:-1]
-            elif suffixes[-1] in ('.sqlite', '.xml'):
+            elif suffixes[-1] in (".sqlite", ".xml"):
                dec = lambda x: x
        if dec is None:
            print(f"Failed to read from {path}: {list(dbfiles)}", flush=True)
            return
-        with db.open('rb') as f:
+        with db.open("rb") as f:
            tmp.write(dec(f.read()))
            tmp.flush()

-        if suffixes[-1] == '.sqlite':
+        if suffixes[-1] == ".sqlite":
            conn = sqlite3.connect(tmp.name)
            c = conn.cursor()
            c.execute("select sum(size_package),count(1) from packages")
            size, cnt = c.fetchone()
            conn.close()
-        elif suffixes[-1] == '.xml':
+        elif suffixes[-1] == ".xml":
            try:
                tree = ET.parse(tmp.name)
                root = tree.getroot()
-                assert root.tag.endswith('metadata')
+                assert root.tag.endswith("metadata")
                cnt, size = 0, 0
-                for location in root.findall('./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size'):
-                    size += int(location.attrib['package'])
+                for location in root.findall(
+                    "./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size"
+                ):
+                    size += int(location.attrib["package"])
                    cnt += 1
            except:
                traceback.print_exc()
@ -69,23 +71,27 @@ def calc_repo_size(path: Path):
        print(f"  {cnt} packages, {size} bytes in total", flush=True)

        global REPO_STAT
-        REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0) # size can be None
+        REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0)  # size can be None

-def check_and_download(url: str, dst_file: Path)->int:
+
+def check_and_download(url: str, dst_file: Path) -> int:
    try:
        start = time.time()
        with requests.get(url, stream=True, timeout=(5, 10)) as r:
            r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None

-            with dst_file.open('wb') as f:
+            with dst_file.open("wb") as f:
                for chunk in r.iter_content(chunk_size=1024**2):
                    if time.time() - start > DOWNLOAD_TIMEOUT:
                        raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks

                    f.write(chunk)
            if remote_ts is not None:
@ -93,13 +99,15 @@ def check_and_download(url: str, dst_file: Path)->int:
        return 0
    except BaseException as e:
        print(e, flush=True)
-        if dst_file.is_file(): dst_file.unlink()
+        if dst_file.is_file():
+            dst_file.unlink()
    return 1

+
 def download_repodata(url: str, path: Path) -> int:
    path = path / "repodata"
    path.mkdir(exist_ok=True)
-    oldfiles = set(path.glob('*.*'))
+    oldfiles = set(path.glob("*.*"))
    newfiles = set()
    if check_and_download(url + "/repodata/repomd.xml", path / ".repomd.xml") != 0:
        print(f"Failed to download the repomd.xml of {url}")
@ -107,64 +115,78 @@ def download_repodata(url: str, path: Path) -> int:
    try:
        tree = ET.parse(path / ".repomd.xml")
        root = tree.getroot()
-        assert root.tag.endswith('repomd')
-        for location in root.findall('./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location'):
-                href = location.attrib['href']
-                assert len(href) > 9 and href[:9] == 'repodata/'
-                fn = path / href[9:]
-                newfiles.add(fn)
-                if check_and_download(url + '/' + href, fn) != 0:
-                    print(f"Failed to download the {href}")
-                    return 1
+        assert root.tag.endswith("repomd")
+        for location in root.findall(
+            "./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location"
+        ):
+            href = location.attrib["href"]
+            assert len(href) > 9 and href[:9] == "repodata/"
+            fn = path / href[9:]
+            newfiles.add(fn)
+            if check_and_download(url + "/" + href, fn) != 0:
+                print(f"Failed to download the {href}")
+                return 1
    except BaseException as e:
        traceback.print_exc()
        return 1

-    (path / ".repomd.xml").rename(path / "repomd.xml") # update the repomd.xml
+    (path / ".repomd.xml").rename(path / "repomd.xml")  # update the repomd.xml
    newfiles.add(path / "repomd.xml")
-    for i in (oldfiles - newfiles):
+    for i in oldfiles - newfiles:
        print(f"Deleting old files: {i}")
        i.unlink()

+
 def check_args(prop: str, lst: List[str]):
    for s in lst:
-        if len(s)==0 or ' ' in s:
+        if len(s) == 0 or " " in s:
            raise ValueError(f"Invalid item in {prop}: {repr(s)}")

+
 def substitute_vars(s: str, vardict: Dict[str, str]) -> str:
    for key, val in vardict.items():
-        tpl = "@{"+key+"}"
+        tpl = "@{" + key + "}"
        s = s.replace(tpl, val)
    return s

+
 def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("base_url", type=str, help="base URL")
    parser.add_argument("os_version", type=str, help="e.g. 7-8,9")
-    parser.add_argument("component", type=str, help="e.g. mysql56-community,mysql57-community")
+    parser.add_argument(
+        "component", type=str, help="e.g. mysql56-community,mysql57-community"
+    )
    parser.add_argument("arch", type=str, help="e.g. x86_64,aarch64")
    parser.add_argument("repo_name", type=str, help="e.g. @{comp}-el@{os_ver}")
    parser.add_argument("working_dir", type=Path, help="working directory")
-    parser.add_argument("--download-repodata", action='store_true',
-                        help='download repodata files instead of generating them')
-    parser.add_argument("--pass-arch-to-reposync", action='store_true',
-                        help='''pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)''')
+    parser.add_argument(
+        "--download-repodata",
+        action="store_true",
+        help="download repodata files instead of generating them",
+    )
+    parser.add_argument(
+        "--pass-arch-to-reposync",
+        action="store_true",
+        help="""pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)""",
+    )
    args = parser.parse_args()

    os_list = []
-    for os_version in args.os_version.split(','):
-        if '-' in os_version and '-stream' not in os_version:
-            dash = os_version.index('-')
-            os_list = os_list + [ str(i) for i in range(
-                int(os_version[:dash]),
-                1+int(os_version[dash+1:])) ]
+    for os_version in args.os_version.split(","):
+        if "-" in os_version and "-stream" not in os_version:
+            dash = os_version.index("-")
+            os_list = os_list + [
+                str(i)
+                for i in range(int(os_version[:dash]), 1 + int(os_version[dash + 1 :]))
+            ]
        else:
            os_list.append(os_version)
    check_args("os_version", os_list)
-    component_list = args.component.split(',')
+    component_list = args.component.split(",")
    check_args("component", component_list)
-    arch_list = args.arch.split(',')
+    arch_list = args.arch.split(",")
    check_args("arch", arch_list)

    failed = []
@ -175,16 +197,18 @@ def main():
        for os in os_list:
            for comp in component_list:
                vardict = {
-                    'arch': arch,
-                    'os_ver': os,
-                    'comp': comp,
+                    "arch": arch,
+                    "os_ver": os,
+                    "comp": comp,
                }

                name = substitute_vars(args.repo_name, vardict)
                url = substitute_vars(args.base_url, vardict)
                try:
-                    probe_url = url + ('' if url.endswith('/') else '/') + "repodata/repomd.xml"
-                    r = requests.head(probe_url, timeout=(7,7))
+                    probe_url = (
+                        url + ("" if url.endswith("/") else "/") + "repodata/repomd.xml"
+                    )
+                    r = requests.head(probe_url, timeout=(7, 7))
                    if r.status_code < 400 or r.status_code == 403:
                        yield (name, url)
                    else:
@ -195,19 +219,23 @@ def main():
    for arch in arch_list:
        dest_dirs = []
        conf = tempfile.NamedTemporaryFile("w", suffix=".conf")
-        conf.write('''
+        conf.write(
+            """
 [main]
 keepcache=0
-''')
+"""
+        )
        for name, url in combination_os_comp(arch):
-            conf.write(f'''
+            conf.write(
+                f"""
 [{name}]
 name={name}
 baseurl={url}
 repo_gpgcheck=0
 gpgcheck=0
 enabled=1
-''')
+"""
+            )
            dst = (args.working_dir / name).absolute()
            dst.mkdir(parents=True, exist_ok=True)
            dest_dirs.append(dst)
@ -217,13 +245,18 @@ enabled=1

        if len(dest_dirs) == 0:
            print("Nothing to sync", flush=True)
-            failed.append(('', arch))
+            failed.append(("", arch))
            continue

        cmd_args = [
-            "dnf", "reposync",
-            "-c", conf.name,
-            "--delete", "-p", str(args.working_dir.absolute())]
+            "dnf",
+            "reposync",
+            "-c",
+            conf.name,
+            "--delete",
+            "-p",
+            str(args.working_dir.absolute()),
+        ]
        if args.pass_arch_to_reposync:
            cmd_args += ["--arch", arch]
        print(f"Launching dnf reposync with command: {cmd_args}", flush=True)
@ -237,7 +270,16 @@ enabled=1
            if args.download_repodata:
                download_repodata(url, path)
            else:
-                cmd_args = ["createrepo_c", "--update", "-v", "-c", cache_dir, "-o", str(path), str(path)]
+                cmd_args = [
+                    "createrepo_c",
+                    "--update",
+                    "-v",
+                    "-c",
+                    cache_dir,
+                    "-o",
+                    str(path),
+                    str(path),
+                ]
                print(f"Launching createrepo with command: {cmd_args}", flush=True)
                ret = sp.run(cmd_args)
            calc_repo_size(path)
@ -250,5 +292,6 @@ enabled=1
                total_size = sum([r[0] for r in REPO_STAT.values()])
                fd.write(f"+{total_size}")

+
 if __name__ == "__main__":
    main()