mirror of
https://github.com/tuna/tunasync-scripts.git
synced 2025-04-20 04:12:42 +00:00
298 lines
9.5 KiB
Python
Executable File
298 lines
9.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import traceback
|
|
import os
|
|
import subprocess as sp
|
|
import tempfile
|
|
import argparse
|
|
import bz2
|
|
import gzip
|
|
import sqlite3
|
|
import traceback
|
|
import time
|
|
from email.utils import parsedate_to_datetime
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
from typing import List, Dict
|
|
import requests
|
|
|
|
REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "")
|
|
DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
|
|
REPO_STAT = {}
|
|
|
|
|
|
def calc_repo_size(path: Path):
|
|
dbfiles = path.glob("repodata/*primary.*")
|
|
with tempfile.NamedTemporaryFile() as tmp:
|
|
dec = None
|
|
dbfile = None
|
|
for db in dbfiles:
|
|
dbfile = db
|
|
suffixes = db.suffixes
|
|
if suffixes[-1] == ".bz2":
|
|
dec = bz2.decompress
|
|
suffixes = suffixes[:-1]
|
|
elif suffixes[-1] == ".gz":
|
|
dec = gzip.decompress
|
|
suffixes = suffixes[:-1]
|
|
elif suffixes[-1] in (".sqlite", ".xml"):
|
|
dec = lambda x: x
|
|
if dec is None:
|
|
print(f"Failed to read from {path}: {list(dbfiles)}", flush=True)
|
|
return
|
|
with db.open("rb") as f:
|
|
tmp.write(dec(f.read()))
|
|
tmp.flush()
|
|
|
|
if suffixes[-1] == ".sqlite":
|
|
conn = sqlite3.connect(tmp.name)
|
|
c = conn.cursor()
|
|
c.execute("select sum(size_package),count(1) from packages")
|
|
size, cnt = c.fetchone()
|
|
conn.close()
|
|
elif suffixes[-1] == ".xml":
|
|
try:
|
|
tree = ET.parse(tmp.name)
|
|
root = tree.getroot()
|
|
assert root.tag.endswith("metadata")
|
|
cnt, size = 0, 0
|
|
for location in root.findall(
|
|
"./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size"
|
|
):
|
|
size += int(location.attrib["package"])
|
|
cnt += 1
|
|
except:
|
|
traceback.print_exc()
|
|
return
|
|
else:
|
|
print(f"Unknown suffix {suffixes}")
|
|
return
|
|
|
|
print(f"Repository {path}:")
|
|
print(f" {cnt} packages, {size} bytes in total", flush=True)
|
|
|
|
global REPO_STAT
|
|
REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0) # size can be None
|
|
|
|
|
|
def check_and_download(url: str, dst_file: Path) -> int:
|
|
try:
|
|
start = time.time()
|
|
with requests.get(url, stream=True, timeout=(5, 10)) as r:
|
|
r.raise_for_status()
|
|
if "last-modified" in r.headers:
|
|
remote_ts = parsedate_to_datetime(
|
|
r.headers["last-modified"]
|
|
).timestamp()
|
|
else:
|
|
remote_ts = None
|
|
|
|
with dst_file.open("wb") as f:
|
|
for chunk in r.iter_content(chunk_size=1024**2):
|
|
if time.time() - start > DOWNLOAD_TIMEOUT:
|
|
raise TimeoutError("Download timeout")
|
|
if not chunk:
|
|
continue # filter out keep-alive new chunks
|
|
|
|
f.write(chunk)
|
|
if remote_ts is not None:
|
|
os.utime(dst_file, (remote_ts, remote_ts))
|
|
return 0
|
|
except BaseException as e:
|
|
print(e, flush=True)
|
|
if dst_file.is_file():
|
|
dst_file.unlink()
|
|
return 1
|
|
|
|
|
|
def download_repodata(url: str, path: Path) -> int:
|
|
path = path / "repodata"
|
|
path.mkdir(exist_ok=True)
|
|
oldfiles = set(path.glob("*.*"))
|
|
newfiles = set()
|
|
if check_and_download(url + "/repodata/repomd.xml", path / ".repomd.xml") != 0:
|
|
print(f"Failed to download the repomd.xml of {url}")
|
|
return 1
|
|
try:
|
|
tree = ET.parse(path / ".repomd.xml")
|
|
root = tree.getroot()
|
|
assert root.tag.endswith("repomd")
|
|
for location in root.findall(
|
|
"./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location"
|
|
):
|
|
href = location.attrib["href"]
|
|
assert len(href) > 9 and href[:9] == "repodata/"
|
|
fn = path / href[9:]
|
|
newfiles.add(fn)
|
|
if check_and_download(url + "/" + href, fn) != 0:
|
|
print(f"Failed to download the {href}")
|
|
return 1
|
|
except BaseException as e:
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
(path / ".repomd.xml").rename(path / "repomd.xml") # update the repomd.xml
|
|
newfiles.add(path / "repomd.xml")
|
|
for i in oldfiles - newfiles:
|
|
print(f"Deleting old files: {i}")
|
|
i.unlink()
|
|
|
|
|
|
def check_args(prop: str, lst: List[str]):
|
|
for s in lst:
|
|
if len(s) == 0 or " " in s:
|
|
raise ValueError(f"Invalid item in {prop}: {repr(s)}")
|
|
|
|
|
|
def substitute_vars(s: str, vardict: Dict[str, str]) -> str:
|
|
for key, val in vardict.items():
|
|
tpl = "@{" + key + "}"
|
|
s = s.replace(tpl, val)
|
|
return s
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("base_url", type=str, help="base URL")
|
|
parser.add_argument("os_version", type=str, help="e.g. 7-8,9")
|
|
parser.add_argument(
|
|
"component", type=str, help="e.g. mysql56-community,mysql57-community"
|
|
)
|
|
parser.add_argument("arch", type=str, help="e.g. x86_64,aarch64")
|
|
parser.add_argument("repo_name", type=str, help="e.g. @{comp}-el@{os_ver}")
|
|
parser.add_argument("working_dir", type=Path, help="working directory")
|
|
parser.add_argument(
|
|
"--download-repodata",
|
|
action="store_true",
|
|
help="download repodata files instead of generating them",
|
|
)
|
|
parser.add_argument(
|
|
"--pass-arch-to-reposync",
|
|
action="store_true",
|
|
help="""pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)""",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
os_list = []
|
|
for os_version in args.os_version.split(","):
|
|
if "-" in os_version and "-stream" not in os_version:
|
|
dash = os_version.index("-")
|
|
os_list = os_list + [
|
|
str(i)
|
|
for i in range(int(os_version[:dash]), 1 + int(os_version[dash + 1 :]))
|
|
]
|
|
else:
|
|
os_list.append(os_version)
|
|
check_args("os_version", os_list)
|
|
component_list = args.component.split(",")
|
|
check_args("component", component_list)
|
|
arch_list = args.arch.split(",")
|
|
check_args("arch", arch_list)
|
|
|
|
failed = []
|
|
args.working_dir.mkdir(parents=True, exist_ok=True)
|
|
cache_dir = tempfile.mkdtemp()
|
|
|
|
def combination_os_comp(arch: str):
|
|
for os in os_list:
|
|
for comp in component_list:
|
|
vardict = {
|
|
"arch": arch,
|
|
"os_ver": os,
|
|
"comp": comp,
|
|
}
|
|
|
|
name = substitute_vars(args.repo_name, vardict)
|
|
url = substitute_vars(args.base_url, vardict)
|
|
try:
|
|
probe_url = (
|
|
url + ("" if url.endswith("/") else "/") + "repodata/repomd.xml"
|
|
)
|
|
r = requests.head(probe_url, timeout=(7, 7))
|
|
if r.status_code < 400 or r.status_code == 403:
|
|
yield (name, url)
|
|
else:
|
|
print(probe_url, "->", r.status_code)
|
|
except:
|
|
traceback.print_exc()
|
|
|
|
for arch in arch_list:
|
|
dest_dirs = []
|
|
conf = tempfile.NamedTemporaryFile("w", suffix=".conf")
|
|
conf.write(
|
|
"""
|
|
[main]
|
|
keepcache=0
|
|
"""
|
|
)
|
|
for name, url in combination_os_comp(arch):
|
|
conf.write(
|
|
f"""
|
|
[{name}]
|
|
name={name}
|
|
baseurl={url}
|
|
repo_gpgcheck=0
|
|
gpgcheck=0
|
|
enabled=1
|
|
"""
|
|
)
|
|
dst = (args.working_dir / name).absolute()
|
|
dst.mkdir(parents=True, exist_ok=True)
|
|
dest_dirs.append(dst)
|
|
conf.flush()
|
|
# sp.run(["cat", conf.name])
|
|
# sp.run(["ls", "-la", cache_dir])
|
|
|
|
if len(dest_dirs) == 0:
|
|
print("Nothing to sync", flush=True)
|
|
failed.append(("", arch))
|
|
continue
|
|
|
|
cmd_args = [
|
|
"dnf",
|
|
"reposync",
|
|
"-c",
|
|
conf.name,
|
|
"--delete",
|
|
"-p",
|
|
str(args.working_dir.absolute()),
|
|
]
|
|
if args.pass_arch_to_reposync:
|
|
cmd_args += ["--arch", arch]
|
|
print(f"Launching dnf reposync with command: {cmd_args}", flush=True)
|
|
ret = sp.run(cmd_args)
|
|
if ret.returncode != 0:
|
|
failed.append((name, arch))
|
|
continue
|
|
|
|
for path in dest_dirs:
|
|
path.mkdir(exist_ok=True)
|
|
if args.download_repodata:
|
|
download_repodata(url, path)
|
|
else:
|
|
cmd_args = [
|
|
"createrepo_c",
|
|
"--update",
|
|
"-v",
|
|
"-c",
|
|
cache_dir,
|
|
"-o",
|
|
str(path),
|
|
str(path),
|
|
]
|
|
print(f"Launching createrepo with command: {cmd_args}", flush=True)
|
|
ret = sp.run(cmd_args)
|
|
calc_repo_size(path)
|
|
|
|
if len(failed) > 0:
|
|
print(f"Failed YUM repos: {failed}", flush=True)
|
|
else:
|
|
if len(REPO_SIZE_FILE) > 0:
|
|
with open(REPO_SIZE_FILE, "a") as fd:
|
|
total_size = sum([r[0] for r in REPO_STAT.values()])
|
|
fd.write(f"+{total_size}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|