Optimize utils.create_package_stubs

This commit is contained in:
taoky 2024-09-04 17:52:59 +08:00
parent 47529107ae
commit 3a1b20541c

View File

@ -3,18 +3,21 @@
# It requires a full simple/ and db (genlocal-ed)
# Call like: python -m utils.create_package_stubs /path/to/pypi/
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import unquote
from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json
from pathlib import Path
import sys
from tqdm import tqdm
import os
from os.path import (
normpath,
) # fast path computation, instead of accessing real files like pathlib
from tqdm import tqdm
from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json
IOWORKERS = int(os.environ.get("SHADOWMIRE_IOWORKERS", "2"))
if __name__ == "__main__":
pass
try:
repo = sys.argv[1]
except IndexError:
@ -24,13 +27,27 @@ if __name__ == "__main__":
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
local_names = set(local_db.keys())
simple_dir = basedir / "simple"
for package_name in tqdm(local_names, desc="Creating stub"):
package_simple_path = simple_dir / package_name
json_simple = package_simple_path / "index.v1_json"
hrefsize_json = get_package_urls_size_from_index_json(json_simple)
for href, _ in hrefsize_json:
relative = unquote(href)
dest = Path(normpath(package_simple_path / relative))
dest.parent.mkdir(parents=True, exist_ok=True)
if not dest.exists():
dest.touch()
with ThreadPoolExecutor(max_workers=IOWORKERS) as executor:
def handle(package_name: str) -> None:
package_simple_path = simple_dir / package_name
json_simple = package_simple_path / "index.v1_json"
hrefsize_json = get_package_urls_size_from_index_json(json_simple)
for href, _ in hrefsize_json:
relative = unquote(href)
dest = Path(normpath(package_simple_path / relative))
if not dest.exists():
dest.parent.mkdir(parents=True, exist_ok=True)
fd = os.open(dest, os.O_CREAT, 0o664)
os.close(fd)
futures = {
executor.submit(handle, package_name): package_name
for package_name in local_names
}
for future in tqdm(
as_completed(futures),
total=len(local_names),
desc="Creating stub",
):
future.result()