Add --excluded-wheel-filename

Closes: #7
This commit is contained in:
taoky 2025-06-17 23:32:06 +08:00
parent 5942907954
commit 5a4f28a3d3
2 changed files with 63 additions and 12 deletions

View File

@ -63,6 +63,12 @@ Also it supports prerelease filtering like [this](https://bandersnatch.readthedo
./shadowmire.py sync --sync-packages --prerelease-exclude '^duckdb$' ./shadowmire.py sync --sync-packages --prerelease-exclude '^duckdb$'
``` ```
Also wheel filename filtering is also supported, if you need to exclude wheels by platform, python version, etc.:
```shell
./shadowmire.py sync --sync-packages --excluded-wheel-filename '-macosx-10.6-ppc.whl$'
```
And `--shadowmire-upstream`, if you don't want to sync from PyPI directly. And `--shadowmire-upstream`, if you don't want to sync from PyPI directly.
```shell ```shell

View File

@ -577,6 +577,7 @@ class SyncBase:
self, self,
package_names: list[str], package_names: list[str],
prerelease_excludes: list[re.Pattern[str]], prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
json_files: set[str], json_files: set[str],
packages_pathcache: set[str], packages_pathcache: set[str],
compare_size: bool, compare_size: bool,
@ -667,16 +668,23 @@ class SyncBase:
exit_with_futures(futures) exit_with_futures(futures)
logger.info("%s packages to update in check_and_update()", len(to_update)) logger.info("%s packages to update in check_and_update()", len(to_update))
return self.parallel_update(to_update, prerelease_excludes) return self.parallel_update(to_update, prerelease_excludes, excluded_wheel_filenames)
def parallel_update( def parallel_update(
self, package_names: list[str], prerelease_excludes: list[re.Pattern[str]] self,
package_names: list[str],
prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
) -> bool: ) -> bool:
success = True success = True
with ThreadPoolExecutor(max_workers=WORKERS) as executor: with ThreadPoolExecutor(max_workers=WORKERS) as executor:
futures = { futures = {
executor.submit( executor.submit(
self.do_update, package_name, prerelease_excludes, False self.do_update,
package_name,
prerelease_excludes,
excluded_wheel_filenames,
False,
): ( ): (
idx, idx,
package_name, package_name,
@ -707,7 +715,10 @@ class SyncBase:
return success return success
def do_sync_plan( def do_sync_plan(
self, plan: Plan, prerelease_excludes: list[re.Pattern[str]] self,
plan: Plan,
prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
) -> bool: ) -> bool:
to_remove = plan.remove to_remove = plan.remove
to_update = plan.update to_update = plan.update
@ -715,7 +726,7 @@ class SyncBase:
for package_name in to_remove: for package_name in to_remove:
self.do_remove(package_name) self.do_remove(package_name)
return self.parallel_update(to_update, prerelease_excludes) return self.parallel_update(to_update, prerelease_excludes, excluded_wheel_filenames)
def do_remove( def do_remove(
self, package_name: str, use_db: bool = True, remove_packages: bool = True self, package_name: str, use_db: bool = True, remove_packages: bool = True
@ -744,6 +755,7 @@ class SyncBase:
self, self,
package_name: str, package_name: str,
prerelease_excludes: list[re.Pattern[str]], prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
use_db: bool = True, use_db: bool = True,
) -> Optional[int]: ) -> Optional[int]:
raise NotImplementedError raise NotImplementedError
@ -836,6 +848,17 @@ def filter_release_from_meta(
del meta["releases"][release] del meta["releases"][release]
def filter_wheel_file_from_meta(
meta: dict, patterns: list[re.Pattern[str]] | tuple[re.Pattern[str], ...]
) -> None:
for release_infos in meta["releases"].values():
for release_idx in range(len(release_infos) - 1, -1, -1):
release_info = release_infos[release_idx]
filename = release_info["filename"]
if match_patterns(filename, patterns):
del release_infos[release_idx]
class SyncPyPI(SyncBase): class SyncPyPI(SyncBase):
def __init__( def __init__(
self, basedir: Path, local_db: LocalVersionKV, sync_packages: bool = False self, basedir: Path, local_db: LocalVersionKV, sync_packages: bool = False
@ -859,6 +882,7 @@ class SyncPyPI(SyncBase):
self, self,
package_name: str, package_name: str,
prerelease_excludes: list[re.Pattern[str]], prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
use_db: bool = True, use_db: bool = True,
) -> Optional[int]: ) -> Optional[int]:
logger.info("updating %s", package_name) logger.info("updating %s", package_name)
@ -902,9 +926,11 @@ class SyncPyPI(SyncBase):
self.local_db.set(package_name, -1) self.local_db.set(package_name, -1)
return None return None
# filter prerelease, if necessary # filter prerelease and wheel files, if necessary
if match_patterns(package_name, prerelease_excludes): if match_patterns(package_name, prerelease_excludes):
filter_release_from_meta(meta, PRERELEASE_PATTERNS) filter_release_from_meta(meta, PRERELEASE_PATTERNS)
if excluded_wheel_filenames:
filter_wheel_file_from_meta(meta, excluded_wheel_filenames)
if self.sync_packages: if self.sync_packages:
# sync packages first, then sync index # sync packages first, then sync index
@ -989,6 +1015,7 @@ class SyncPlainHTTP(SyncBase):
self, self,
package_name: str, package_name: str,
prerelease_excludes: list[re.Pattern[str]], prerelease_excludes: list[re.Pattern[str]],
excluded_wheel_filenames: list[re.Pattern[str]],
use_db: bool = True, use_db: bool = True,
) -> Optional[int]: ) -> Optional[int]:
logger.info("updating %s", package_name) logger.info("updating %s", package_name)
@ -1011,9 +1038,11 @@ class SyncPlainHTTP(SyncBase):
return None return None
assert resp assert resp
meta = resp.json() meta = resp.json()
# filter prerelease, if necessary # filter prerelease and wheel files, if necessary
if match_patterns(package_name, prerelease_excludes): if match_patterns(package_name, prerelease_excludes):
filter_release_from_meta(meta, PRERELEASE_PATTERNS) filter_release_from_meta(meta, PRERELEASE_PATTERNS)
if excluded_wheel_filenames:
filter_wheel_file_from_meta(meta, excluded_wheel_filenames)
if self.sync_packages: if self.sync_packages:
release_files = PyPI.get_release_files_from_meta(meta) release_files = PyPI.get_release_files_from_meta(meta)
@ -1097,6 +1126,11 @@ def sync_shared_args(func: Callable[..., Any]) -> Callable[..., Any]:
multiple=True, multiple=True,
help="Package names of which prereleases will be excluded. Regex.", help="Package names of which prereleases will be excluded. Regex.",
), ),
click.option(
"--excluded-wheel-filename",
multiple=True,
help="Specify patterns to exclude wheel files (applies to all packages). Regex.",
),
] ]
for option in shared_options[::-1]: for option in shared_options[::-1]:
func = option(func) func = option(func)
@ -1195,12 +1229,14 @@ def sync(
shadowmire_upstream: Optional[str], shadowmire_upstream: Optional[str],
exclude: tuple[str], exclude: tuple[str],
prerelease_exclude: tuple[str], prerelease_exclude: tuple[str],
excluded_wheel_filename: tuple[str],
use_pypi_index: bool, use_pypi_index: bool,
) -> None: ) -> None:
basedir: Path = ctx.obj["basedir"] basedir: Path = ctx.obj["basedir"]
local_db: LocalVersionKV = ctx.obj["local_db"] local_db: LocalVersionKV = ctx.obj["local_db"]
excludes = exclude_to_excludes(exclude) excludes = exclude_to_excludes(exclude)
prerelease_excludes = exclude_to_excludes(prerelease_exclude) prerelease_excludes = exclude_to_excludes(prerelease_exclude)
excluded_wheel_filenames = exclude_to_excludes(excluded_wheel_filename)
syncer = get_syncer( syncer = get_syncer(
basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index
) )
@ -1209,7 +1245,7 @@ def sync(
# save plan for debugging # save plan for debugging
with overwrite(basedir / "plan.json") as f: with overwrite(basedir / "plan.json") as f:
json.dump(plan, f, default=vars, indent=2) json.dump(plan, f, default=vars, indent=2)
success = syncer.do_sync_plan(plan, prerelease_excludes) success = syncer.do_sync_plan(plan, prerelease_excludes, excluded_wheel_filenames)
syncer.finalize() syncer.finalize()
logger.info("Synchronization finished. Success: %s", success) logger.info("Synchronization finished. Success: %s", success)
@ -1279,6 +1315,7 @@ def verify(
shadowmire_upstream: Optional[str], shadowmire_upstream: Optional[str],
exclude: tuple[str], exclude: tuple[str],
prerelease_exclude: tuple[str], prerelease_exclude: tuple[str],
excluded_wheel_filename: tuple[str],
remove_not_in_local: bool, remove_not_in_local: bool,
compare_size: bool, compare_size: bool,
use_pypi_index: bool, use_pypi_index: bool,
@ -1287,6 +1324,7 @@ def verify(
local_db: LocalVersionKV = ctx.obj["local_db"] local_db: LocalVersionKV = ctx.obj["local_db"]
excludes = exclude_to_excludes(exclude) excludes = exclude_to_excludes(exclude)
prerelease_excludes = exclude_to_excludes(prerelease_exclude) prerelease_excludes = exclude_to_excludes(prerelease_exclude)
excluded_wheel_filenames = exclude_to_excludes(excluded_wheel_filename)
syncer = get_syncer( syncer = get_syncer(
basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index
) )
@ -1343,7 +1381,9 @@ def verify(
return res return res
futures = { futures = {
executor.submit(packages_iterate, first_dir.name, idx % IOWORKERS): first_dir.name # type: ignore executor.submit(
packages_iterate, first_dir.name, idx % IOWORKERS
): first_dir.name # type: ignore
for idx, first_dir in enumerate(fast_iterdir((basedir / "packages"), "dir")) for idx, first_dir in enumerate(fast_iterdir((basedir / "packages"), "dir"))
} }
try: try:
@ -1366,6 +1406,7 @@ def verify(
success = syncer.check_and_update( success = syncer.check_and_update(
list(local_names), list(local_names),
prerelease_excludes, prerelease_excludes,
excluded_wheel_filenames,
json_files, json_files,
packages_pathcache, packages_pathcache,
compare_size, compare_size,
@ -1395,7 +1436,8 @@ def verify(
# MyPy does not enjoy same variable name with different types, even when --allow-redefinition # MyPy does not enjoy same variable name with different types, even when --allow-redefinition
# Ignore here to make mypy happy # Ignore here to make mypy happy
futures = { futures = {
executor.submit(iterate_simple, sname): sname for sname in simple_dirs # type: ignore executor.submit(iterate_simple, sname): sname
for sname in simple_dirs # type: ignore
} }
try: try:
for future in tqdm( for future in tqdm(
@ -1438,6 +1480,7 @@ def do_update(
shadowmire_upstream: Optional[str], shadowmire_upstream: Optional[str],
exclude: tuple[str], exclude: tuple[str],
prerelease_exclude: tuple[str], prerelease_exclude: tuple[str],
excluded_wheel_filename: tuple[str],
use_pypi_index: bool, use_pypi_index: bool,
package_name: str, package_name: str,
) -> None: ) -> None:
@ -1447,10 +1490,11 @@ def do_update(
if excludes: if excludes:
logger.warning("--exclude is ignored in do_update()") logger.warning("--exclude is ignored in do_update()")
prerelease_excludes = exclude_to_excludes(prerelease_exclude) prerelease_excludes = exclude_to_excludes(prerelease_exclude)
excluded_wheel_filenames = exclude_to_excludes(excluded_wheel_filename)
syncer = get_syncer( syncer = get_syncer(
basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index
) )
syncer.do_update(package_name, prerelease_excludes) syncer.do_update(package_name, prerelease_excludes, excluded_wheel_filenames)
@cli.command(help="Manual remove given package for debugging purpose") @cli.command(help="Manual remove given package for debugging purpose")
@ -1463,12 +1507,13 @@ def do_remove(
shadowmire_upstream: Optional[str], shadowmire_upstream: Optional[str],
exclude: tuple[str], exclude: tuple[str],
prerelease_exclude: tuple[str], prerelease_exclude: tuple[str],
excluded_wheel_filename: tuple[str],
use_pypi_index: bool, use_pypi_index: bool,
package_name: str, package_name: str,
) -> None: ) -> None:
basedir = ctx.obj["basedir"] basedir = ctx.obj["basedir"]
local_db = ctx.obj["local_db"] local_db = ctx.obj["local_db"]
if exclude or prerelease_exclude: if exclude or prerelease_exclude or excluded_wheel_filename:
logger.warning("exclusion rules are ignored in do_remove()") logger.warning("exclusion rules are ignored in do_remove()")
syncer = get_syncer( syncer = get_syncer(
basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index basedir, local_db, sync_packages, shadowmire_upstream, use_pypi_index