mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 17:32:43 +00:00
Adjust verify behavior
Verifying is heavily network-bound if we need to update EVERY package, so let's just check if local is consist instead of making sure everything is up-to-date.
This commit is contained in:
parent
804fba55cb
commit
2aa8c9ae97
@ -71,7 +71,14 @@ If you already have a pypi repo, use `genlocal` first to generate a local db:
|
|||||||
./shadowmire.py genlocal
|
./shadowmire.py genlocal
|
||||||
```
|
```
|
||||||
|
|
||||||
Verify command could be used if you believe that something is wrong. It would remove packages NOT in local db, update all local packages, and delete unreferenced files in `packages` folder:
|
Verify command could be used if you believe that something is wrong (inconsistent). It would:
|
||||||
|
|
||||||
|
1. remove packages NOT in local db
|
||||||
|
2. remove packages NOT in remote (with consideration of `--exclude`)
|
||||||
|
3. make sure all local indexes are valid, and (if --sync-packages) have valid local package files
|
||||||
|
|
||||||
|
(`--prerelease-exclude` would be ignored)
|
||||||
|
4. delete unreferenced files in `packages` folder
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
./shadowmire.py verify --sync-packages
|
./shadowmire.py verify --sync-packages
|
||||||
|
@ -418,6 +418,26 @@ class SyncBase:
|
|||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> dict[str, int]:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def check_and_update(self, package_names: list[str]) -> None:
|
||||||
|
to_update = []
|
||||||
|
for package_name in tqdm(package_names, desc="Checking consistency"):
|
||||||
|
package_simple_path = self.basedir / "simple" / package_name
|
||||||
|
hrefs = get_existing_hrefs(package_simple_path)
|
||||||
|
if not hrefs:
|
||||||
|
to_update.append(package_name)
|
||||||
|
continue
|
||||||
|
# OK, check if all hrefs have corresponding files
|
||||||
|
if self.sync_packages:
|
||||||
|
should_update = False
|
||||||
|
for href in hrefs:
|
||||||
|
dest = (package_simple_path / href).resolve()
|
||||||
|
if not dest.exists():
|
||||||
|
should_update = True
|
||||||
|
break
|
||||||
|
if should_update:
|
||||||
|
to_update.append(package_name)
|
||||||
|
self.parallel_update(to_update, [])
|
||||||
|
|
||||||
def parallel_update(
|
def parallel_update(
|
||||||
self, package_names: list, prerelease_excludes: list[re.Pattern[str]]
|
self, package_names: list, prerelease_excludes: list[re.Pattern[str]]
|
||||||
) -> None:
|
) -> None:
|
||||||
@ -432,7 +452,7 @@ class SyncBase:
|
|||||||
for idx, package_name in enumerate(package_names)
|
for idx, package_name in enumerate(package_names)
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
for future in tqdm(as_completed(futures), total=len(package_names)):
|
for future in tqdm(as_completed(futures), total=len(package_names), desc="Updating"):
|
||||||
idx, package_name = futures[future]
|
idx, package_name = futures[future]
|
||||||
try:
|
try:
|
||||||
serial = future.result()
|
serial = future.result()
|
||||||
@ -522,7 +542,11 @@ def download(session: requests.Session, url: str, dest: Path) -> tuple[bool, int
|
|||||||
logger.warning("download %s failed with exception", exc_info=True)
|
logger.warning("download %s failed with exception", exc_info=True)
|
||||||
return False, -1
|
return False, -1
|
||||||
if resp.status_code >= 400:
|
if resp.status_code >= 400:
|
||||||
logger.warning("download %s failed with status %s, skipping this package", url, resp.status_code)
|
logger.warning(
|
||||||
|
"download %s failed with status %s, skipping this package",
|
||||||
|
url,
|
||||||
|
resp.status_code,
|
||||||
|
)
|
||||||
return False, resp.status_code
|
return False, resp.status_code
|
||||||
with overwrite(dest, "wb") as f:
|
with overwrite(dest, "wb") as f:
|
||||||
f.write(resp.content)
|
f.write(resp.content)
|
||||||
@ -557,7 +581,9 @@ class SyncPyPI(SyncBase):
|
|||||||
meta = self.pypi.get_package_metadata(package_name)
|
meta = self.pypi.get_package_metadata(package_name)
|
||||||
logger.debug("%s meta: %s", package_name, meta)
|
logger.debug("%s meta: %s", package_name, meta)
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
logger.warning("%s missing from upstream, skip and ignore in the future.", package_name)
|
logger.warning(
|
||||||
|
"%s missing from upstream, skip and ignore in the future.", package_name
|
||||||
|
)
|
||||||
# try remove it locally, if it does not exist upstream
|
# try remove it locally, if it does not exist upstream
|
||||||
self.do_remove(package_name, write_db=False)
|
self.do_remove(package_name, write_db=False)
|
||||||
if not write_db:
|
if not write_db:
|
||||||
@ -655,13 +681,17 @@ class SyncPlainHTTP(SyncBase):
|
|||||||
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
||||||
file_url = urljoin(self.upstream, f"/simple/{package_name}/{filename}")
|
file_url = urljoin(self.upstream, f"/simple/{package_name}/{filename}")
|
||||||
# Don't overwrite existing index first!
|
# Don't overwrite existing index first!
|
||||||
success, code = download(self.session, file_url, package_simple_path / (filename + ".new"))
|
success, code = download(
|
||||||
|
self.session, file_url, package_simple_path / (filename + ".new")
|
||||||
|
)
|
||||||
if not success:
|
if not success:
|
||||||
if filename != "index.html":
|
if filename != "index.html":
|
||||||
logger.warning("index file %s fails", file_url)
|
logger.warning("index file %s fails", file_url)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.error("critical index file %s fails. Stop with this.", file_url)
|
logger.error(
|
||||||
|
"critical index file %s fails. Stop with this.", file_url
|
||||||
|
)
|
||||||
if code == 404:
|
if code == 404:
|
||||||
self.do_remove(package_name, write_db=False)
|
self.do_remove(package_name, write_db=False)
|
||||||
# We don't return -1 here, as shadowmire upstream would fix this inconsistency next time syncing.
|
# We don't return -1 here, as shadowmire upstream would fix this inconsistency next time syncing.
|
||||||
@ -686,7 +716,7 @@ class SyncPlainHTTP(SyncBase):
|
|||||||
if not success:
|
if not success:
|
||||||
logger.warning("skipping %s as it fails downloading", package_name)
|
logger.warning("skipping %s as it fails downloading", package_name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# OK, now it's safe to rename
|
# OK, now it's safe to rename
|
||||||
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
||||||
file_from = package_simple_path / (filename + ".new")
|
file_from = package_simple_path / (filename + ".new")
|
||||||
@ -851,15 +881,23 @@ def verify(
|
|||||||
basedir: Path = ctx.obj["basedir"]
|
basedir: Path = ctx.obj["basedir"]
|
||||||
local_db: LocalVersionKV = ctx.obj["local_db"]
|
local_db: LocalVersionKV = ctx.obj["local_db"]
|
||||||
excludes = exclude_to_excludes(exclude)
|
excludes = exclude_to_excludes(exclude)
|
||||||
prerelease_excludes = exclude_to_excludes(prerelease_exclude)
|
# prerelease_excludes = exclude_to_excludes(prerelease_exclude)
|
||||||
syncer = get_syncer(basedir, local_db, sync_packages, shadowmire_upstream)
|
syncer = get_syncer(basedir, local_db, sync_packages, shadowmire_upstream)
|
||||||
|
# 1. remove packages NOT in local db
|
||||||
local_names = set(local_db.keys())
|
local_names = set(local_db.keys())
|
||||||
simple_dirs = set([i.name for i in (basedir / "simple").iterdir() if i.is_dir()])
|
simple_dirs = set([i.name for i in (basedir / "simple").iterdir() if i.is_dir()])
|
||||||
for package_name in simple_dirs - local_names:
|
for package_name in simple_dirs - local_names:
|
||||||
syncer.do_remove(package_name)
|
syncer.do_remove(package_name)
|
||||||
syncer.parallel_update(list(local_names), prerelease_excludes)
|
# 2. remove packages NOT in remote
|
||||||
|
local = local_db.dump(skip_invalid=False)
|
||||||
|
plan = syncer.determine_sync_plan(local, excludes)
|
||||||
|
for package_name in plan.remove:
|
||||||
|
# We only take the plan.remove part here
|
||||||
|
syncer.do_remove(package_name)
|
||||||
|
# 3. make sure all local indexes are valid, and (if --sync-packages) have valid local package files
|
||||||
|
syncer.check_and_update(list(local_names))
|
||||||
syncer.finalize()
|
syncer.finalize()
|
||||||
# clean up unreferenced package files
|
# 4. delete unreferenced files in `packages` folder
|
||||||
ref_set = set()
|
ref_set = set()
|
||||||
for sname in simple_dirs:
|
for sname in simple_dirs:
|
||||||
sd = basedir / "simple" / sname
|
sd = basedir / "simple" / sname
|
||||||
|
Loading…
x
Reference in New Issue
Block a user