mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 09:12:43 +00:00
parent
aa6ece7e8b
commit
bc6159807d
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
from types import FrameType
|
from types import FrameType
|
||||||
from typing import IO, Any, Callable, Generator, Optional
|
from typing import IO, Any, Callable, Generator, Literal, Optional
|
||||||
import xmlrpc.client
|
import xmlrpc.client
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import re
|
import re
|
||||||
@ -168,6 +168,22 @@ def remove_dir_with_files(directory: Path) -> None:
|
|||||||
logger.info("Removed dir %s", directory)
|
logger.info("Removed dir %s", directory)
|
||||||
|
|
||||||
|
|
||||||
|
def fast_iterdir(
|
||||||
|
directory: Path, filter_type: Literal["dir", "file"]
|
||||||
|
) -> Generator[os.DirEntry[str], Any, None]:
|
||||||
|
"""
|
||||||
|
iterdir() in pathlib would ignore file type information from getdents64(),
|
||||||
|
which is not acceptable when you have millions of files in one directory,
|
||||||
|
and you need to filter out all files/directories.
|
||||||
|
"""
|
||||||
|
assert filter_type in ["dir", "file"]
|
||||||
|
for item in os.scandir(directory):
|
||||||
|
if filter_type == "dir" and item.is_dir():
|
||||||
|
yield item
|
||||||
|
elif filter_type == "file" and item.is_file():
|
||||||
|
yield item
|
||||||
|
|
||||||
|
|
||||||
def get_package_urls_from_index_html(html_path: Path) -> list[str]:
|
def get_package_urls_from_index_html(html_path: Path) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Get all <a> href (fragments removed) from given simple/<package>/index.html contents
|
Get all <a> href (fragments removed) from given simple/<package>/index.html contents
|
||||||
@ -1069,11 +1085,11 @@ def genlocal(ctx: click.Context) -> None:
|
|||||||
local = {}
|
local = {}
|
||||||
json_dir = basedir / "json"
|
json_dir = basedir / "json"
|
||||||
logger.info("Iterating all items under %s", json_dir)
|
logger.info("Iterating all items under %s", json_dir)
|
||||||
dir_items = [d for d in json_dir.iterdir() if d.is_file()]
|
dir_items = [d for d in fast_iterdir(json_dir, "file")]
|
||||||
logger.info("Detected %s packages in %s in total", len(dir_items), json_dir)
|
logger.info("Detected %s packages in %s in total", len(dir_items), json_dir)
|
||||||
for package_metapath in tqdm(dir_items, desc="Reading packages from json/"):
|
for package_metapath in tqdm(dir_items, desc="Reading packages from json/"):
|
||||||
package_name = package_metapath.name
|
package_name = package_metapath.name
|
||||||
serial = get_local_serial(package_metapath)
|
serial = get_local_serial(Path(package_metapath.path))
|
||||||
if serial:
|
if serial:
|
||||||
local[package_name] = serial
|
local[package_name] = serial
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -1117,8 +1133,8 @@ def verify(
|
|||||||
|
|
||||||
logger.info("====== Step 1. Remove packages NOT in local db ======")
|
logger.info("====== Step 1. Remove packages NOT in local db ======")
|
||||||
local_names = set(local_db.keys())
|
local_names = set(local_db.keys())
|
||||||
simple_dirs = {i.name for i in (basedir / "simple").iterdir() if i.is_dir()}
|
simple_dirs = {i.name for i in fast_iterdir((basedir / "simple"), "dir")}
|
||||||
json_files = {i.name for i in (basedir / "json").iterdir() if i.is_file()}
|
json_files = {i.name for i in fast_iterdir((basedir / "json"), "file")}
|
||||||
not_in_local = (simple_dirs | json_files) - local_names
|
not_in_local = (simple_dirs | json_files) - local_names
|
||||||
logger.info(
|
logger.info(
|
||||||
"%d out of %d local packages NOT in local db",
|
"%d out of %d local packages NOT in local db",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user