From 3a1b20541c93e80ae539bc309cf2bbec4a032a43 Mon Sep 17 00:00:00 2001 From: taoky Date: Wed, 4 Sep 2024 17:52:59 +0800 Subject: [PATCH] Optimize utils.create_package_stubs --- utils/create_package_stubs.py | 43 ++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/utils/create_package_stubs.py b/utils/create_package_stubs.py index e0e6621..e4e3608 100644 --- a/utils/create_package_stubs.py +++ b/utils/create_package_stubs.py @@ -3,18 +3,21 @@ # It requires a full simple/ and db (genlocal-ed) # Call like: python -m utils.create_package_stubs /path/to/pypi/ +from concurrent.futures import ThreadPoolExecutor, as_completed from urllib.parse import unquote -from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json from pathlib import Path import sys -from tqdm import tqdm +import os from os.path import ( normpath, ) # fast path computation, instead of accessing real files like pathlib +from tqdm import tqdm +from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json + +IOWORKERS = int(os.environ.get("SHADOWMIRE_IOWORKERS", "2")) if __name__ == "__main__": - pass try: repo = sys.argv[1] except IndexError: @@ -24,13 +27,27 @@ if __name__ == "__main__": local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json") local_names = set(local_db.keys()) simple_dir = basedir / "simple" - for package_name in tqdm(local_names, desc="Creating stub"): - package_simple_path = simple_dir / package_name - json_simple = package_simple_path / "index.v1_json" - hrefsize_json = get_package_urls_size_from_index_json(json_simple) - for href, _ in hrefsize_json: - relative = unquote(href) - dest = Path(normpath(package_simple_path / relative)) - dest.parent.mkdir(parents=True, exist_ok=True) - if not dest.exists(): - dest.touch() + with ThreadPoolExecutor(max_workers=IOWORKERS) as executor: + + def handle(package_name: str) -> None: + package_simple_path = simple_dir / package_name + json_simple = package_simple_path / "index.v1_json" + hrefsize_json = get_package_urls_size_from_index_json(json_simple) + for href, _ in hrefsize_json: + relative = unquote(href) + dest = Path(normpath(package_simple_path / relative)) + if not dest.exists(): + dest.parent.mkdir(parents=True, exist_ok=True) + fd = os.open(dest, os.O_CREAT, 0o664) + os.close(fd) + + futures = { + executor.submit(handle, package_name): package_name + for package_name in local_names + } + for future in tqdm( + as_completed(futures), + total=len(local_names), + desc="Creating stub", + ): + future.result()