mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 17:32:43 +00:00
Add SHADOWMIRE_IGNORE_THRESHOLD option
Related: https://github.com/tuna/issues/issues/2061
This commit is contained in:
parent
1ca3e23bcf
commit
354bfaf28e
@ -42,6 +42,8 @@ WORKERS = int(os.environ.get("SHADOWMIRE_WORKERS", "3"))
|
|||||||
IOWORKERS = int(os.environ.get("SHADOWMIRE_IOWORKERS", "2"))
|
IOWORKERS = int(os.environ.get("SHADOWMIRE_IOWORKERS", "2"))
|
||||||
# A safety net -- to avoid upstream issues casuing too many packages removed when determinating sync plan.
|
# A safety net -- to avoid upstream issues casuing too many packages removed when determinating sync plan.
|
||||||
MAX_DELETION = int(os.environ.get("SHADOWMIRE_MAX_DELETION", "50000"))
|
MAX_DELETION = int(os.environ.get("SHADOWMIRE_MAX_DELETION", "50000"))
|
||||||
|
# Sometimes PyPI is not consistent -- new packages could not be fetched. This option tries to avoid permanently mark that kind of package as nonexist.
|
||||||
|
IGNORE_THRESHOLD = int(os.environ.get("SHADOWMIRE_IGNORE_THRESHOLD", "1024"))
|
||||||
|
|
||||||
# https://github.com/pypa/bandersnatch/blob/a05af547f8d1958217ef0dc0028890b1839e6116/src/bandersnatch_filter_plugins/prerelease_name.py#L18C1-L23C6
|
# https://github.com/pypa/bandersnatch/blob/a05af547f8d1958217ef0dc0028890b1839e6116/src/bandersnatch_filter_plugins/prerelease_name.py#L18C1-L23C6
|
||||||
PRERELEASE_PATTERNS = (
|
PRERELEASE_PATTERNS = (
|
||||||
@ -337,6 +339,9 @@ class PyPI:
|
|||||||
del ret[key]
|
del ret[key]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def changelog_last_serial(self) -> int:
|
||||||
|
return self.xmlrpc_client.changelog_last_serial() # type: ignore
|
||||||
|
|
||||||
def get_package_metadata(self, package_name: str) -> dict:
|
def get_package_metadata(self, package_name: str) -> dict:
|
||||||
req = self.session.get(urljoin(self.host, f"pypi/{package_name}/json"))
|
req = self.session.get(urljoin(self.host, f"pypi/{package_name}/json"))
|
||||||
if req.status_code == 404:
|
if req.status_code == 404:
|
||||||
@ -828,15 +833,18 @@ class SyncPyPI(SyncBase):
|
|||||||
) -> None:
|
) -> None:
|
||||||
self.pypi = PyPI()
|
self.pypi = PyPI()
|
||||||
self.session = create_requests_session()
|
self.session = create_requests_session()
|
||||||
|
self.last_serial: Optional[int] = None
|
||||||
|
self.remote_packages: Optional[dict[str, int]] = None
|
||||||
super().__init__(basedir, local_db, sync_packages)
|
super().__init__(basedir, local_db, sync_packages)
|
||||||
|
|
||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> dict[str, int]:
|
||||||
ret = self.pypi.list_packages_with_serial()
|
self.last_serial = self.pypi.changelog_last_serial()
|
||||||
logger.info("Remote has %s packages", len(ret))
|
self.remote_packages = self.pypi.list_packages_with_serial()
|
||||||
|
logger.info("Remote has %s packages", len(self.remote_packages))
|
||||||
with overwrite(self.basedir / "remote.json") as f:
|
with overwrite(self.basedir / "remote.json") as f:
|
||||||
json.dump(ret, f)
|
json.dump(self.remote_packages, f)
|
||||||
logger.info("File saved to remote.json.")
|
logger.info("File saved to remote.json.")
|
||||||
return ret
|
return self.remote_packages
|
||||||
|
|
||||||
def do_update(
|
def do_update(
|
||||||
self,
|
self,
|
||||||
@ -852,9 +860,31 @@ class SyncPyPI(SyncBase):
|
|||||||
meta_original = deepcopy(meta)
|
meta_original = deepcopy(meta)
|
||||||
logger.debug("%s meta: %s", package_name, meta)
|
logger.debug("%s meta: %s", package_name, meta)
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
|
if (
|
||||||
|
self.remote_packages is not None
|
||||||
|
and package_name in self.remote_packages
|
||||||
|
):
|
||||||
|
recorded_serial = self.remote_packages[package_name]
|
||||||
|
else:
|
||||||
|
recorded_serial = None
|
||||||
|
if (
|
||||||
|
recorded_serial is not None
|
||||||
|
and self.last_serial is not None
|
||||||
|
and abs(recorded_serial - self.last_serial) < IGNORE_THRESHOLD
|
||||||
|
):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"%s missing from upstream, remove and ignore in the future.",
|
"%s missing from upstream (its serial %s, remote last serial %s), try next time...",
|
||||||
package_name,
|
package_name,
|
||||||
|
recorded_serial,
|
||||||
|
self.last_serial,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"%s missing from upstream (its serial %s, remote last serial %s), remove and ignore in the future.",
|
||||||
|
package_name,
|
||||||
|
recorded_serial,
|
||||||
|
self.last_serial,
|
||||||
)
|
)
|
||||||
# try remove it locally, if it does not exist upstream
|
# try remove it locally, if it does not exist upstream
|
||||||
self.do_remove(package_name, use_db=False)
|
self.do_remove(package_name, use_db=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user