mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 09:12:43 +00:00
Replace glob by fast_iterdir in verification step 4
To avoid stat() every files in package/. Related: #4
This commit is contained in:
parent
6e4aa61119
commit
a5dbd61e95
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
from types import FrameType
|
from types import FrameType
|
||||||
from typing import IO, Any, Callable, Generator, Literal, NoReturn, Optional
|
from typing import IO, Any, Callable, Generator, Literal, NoReturn, Optional, Set
|
||||||
import xmlrpc.client
|
import xmlrpc.client
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import re
|
import re
|
||||||
@ -192,7 +192,7 @@ def remove_dir_with_files(directory: Path) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def fast_iterdir(
|
def fast_iterdir(
|
||||||
directory: Path, filter_type: Literal["dir", "file"]
|
directory: Path | str, filter_type: Literal["dir", "file"]
|
||||||
) -> Generator[os.DirEntry[str], Any, None]:
|
) -> Generator[os.DirEntry[str], Any, None]:
|
||||||
"""
|
"""
|
||||||
iterdir() in pathlib would ignore file type information from getdents64(),
|
iterdir() in pathlib would ignore file type information from getdents64(),
|
||||||
@ -1217,7 +1217,7 @@ def verify(
|
|||||||
logger.info(
|
logger.info(
|
||||||
"====== Step 4. Remove any unreferenced files in `packages` folder ======"
|
"====== Step 4. Remove any unreferenced files in `packages` folder ======"
|
||||||
)
|
)
|
||||||
ref_set = set()
|
ref_set: Set[str] = set()
|
||||||
with ThreadPoolExecutor(max_workers=IOWORKERS) as executor:
|
with ThreadPoolExecutor(max_workers=IOWORKERS) as executor:
|
||||||
# Part 1: iterate simple/
|
# Part 1: iterate simple/
|
||||||
def iterate_simple(sname: str) -> list[str]:
|
def iterate_simple(sname: str) -> list[str]:
|
||||||
@ -1258,15 +1258,15 @@ def verify(
|
|||||||
|
|
||||||
# Part 2: iterate packages
|
# Part 2: iterate packages
|
||||||
def unlink_not_in_set(first_dirname: str, position: int) -> None:
|
def unlink_not_in_set(first_dirname: str, position: int) -> None:
|
||||||
for file in tqdm(
|
with tqdm(desc=f"Iterating packages/{first_dirname}/*/*/*", position=position) as pb:
|
||||||
(basedir / "packages" / first_dirname).glob("*/*/*"),
|
for d1 in fast_iterdir(basedir / "packages" / first_dirname, "dir"):
|
||||||
desc=f"Iterating packages/{first_dirname}/*/*/*",
|
for d2 in fast_iterdir(d1.path, "dir"):
|
||||||
position=position,
|
for file in fast_iterdir(d2.path, "file"):
|
||||||
):
|
pb.update(1)
|
||||||
logger.debug("find file %s", file)
|
logger.debug("find file %s", file)
|
||||||
if str(file) not in ref_set:
|
if file.path not in ref_set:
|
||||||
logger.info("removing unreferenced file %s", file)
|
logger.info("removing unreferenced file %s", file.path)
|
||||||
file.unlink()
|
Path(file.path).unlink()
|
||||||
|
|
||||||
# MyPy does not enjoy same variable name with different types, even when --allow-redefinition
|
# MyPy does not enjoy same variable name with different types, even when --allow-redefinition
|
||||||
# Ignore here to make mypy happy
|
# Ignore here to make mypy happy
|
||||||
|
Loading…
x
Reference in New Issue
Block a user