mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 09:12:43 +00:00
parent
aa6ece7e8b
commit
bc6159807d
@ -2,7 +2,7 @@
|
||||
|
||||
import sys
|
||||
from types import FrameType
|
||||
from typing import IO, Any, Callable, Generator, Optional
|
||||
from typing import IO, Any, Callable, Generator, Literal, Optional
|
||||
import xmlrpc.client
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
@ -168,6 +168,22 @@ def remove_dir_with_files(directory: Path) -> None:
|
||||
logger.info("Removed dir %s", directory)
|
||||
|
||||
|
||||
def fast_iterdir(
|
||||
directory: Path, filter_type: Literal["dir", "file"]
|
||||
) -> Generator[os.DirEntry[str], Any, None]:
|
||||
"""
|
||||
iterdir() in pathlib would ignore file type information from getdents64(),
|
||||
which is not acceptable when you have millions of files in one directory,
|
||||
and you need to filter out all files/directories.
|
||||
"""
|
||||
assert filter_type in ["dir", "file"]
|
||||
for item in os.scandir(directory):
|
||||
if filter_type == "dir" and item.is_dir():
|
||||
yield item
|
||||
elif filter_type == "file" and item.is_file():
|
||||
yield item
|
||||
|
||||
|
||||
def get_package_urls_from_index_html(html_path: Path) -> list[str]:
|
||||
"""
|
||||
Get all <a> href (fragments removed) from given simple/<package>/index.html contents
|
||||
@ -1069,11 +1085,11 @@ def genlocal(ctx: click.Context) -> None:
|
||||
local = {}
|
||||
json_dir = basedir / "json"
|
||||
logger.info("Iterating all items under %s", json_dir)
|
||||
dir_items = [d for d in json_dir.iterdir() if d.is_file()]
|
||||
dir_items = [d for d in fast_iterdir(json_dir, "file")]
|
||||
logger.info("Detected %s packages in %s in total", len(dir_items), json_dir)
|
||||
for package_metapath in tqdm(dir_items, desc="Reading packages from json/"):
|
||||
package_name = package_metapath.name
|
||||
serial = get_local_serial(package_metapath)
|
||||
serial = get_local_serial(Path(package_metapath.path))
|
||||
if serial:
|
||||
local[package_name] = serial
|
||||
logger.info(
|
||||
@ -1117,8 +1133,8 @@ def verify(
|
||||
|
||||
logger.info("====== Step 1. Remove packages NOT in local db ======")
|
||||
local_names = set(local_db.keys())
|
||||
simple_dirs = {i.name for i in (basedir / "simple").iterdir() if i.is_dir()}
|
||||
json_files = {i.name for i in (basedir / "json").iterdir() if i.is_file()}
|
||||
simple_dirs = {i.name for i in fast_iterdir((basedir / "simple"), "dir")}
|
||||
json_files = {i.name for i in fast_iterdir((basedir / "json"), "file")}
|
||||
not_in_local = (simple_dirs | json_files) - local_names
|
||||
logger.info(
|
||||
"%d out of %d local packages NOT in local db",
|
||||
|
Loading…
x
Reference in New Issue
Block a user