mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 17:32:43 +00:00
PyPI Packages downloading support
This commit is contained in:
parent
3fd37601d6
commit
a4574ecaeb
109
shadowmire.py
109
shadowmire.py
@ -215,9 +215,8 @@ class PyPI:
|
|||||||
return prefix + parsed.path
|
return prefix + parsed.path
|
||||||
|
|
||||||
# Func modified from bandersnatch
|
# Func modified from bandersnatch
|
||||||
def generate_html_simple_page(
|
def generate_html_simple_page(self, package_meta: dict) -> str:
|
||||||
self, package_meta: dict, package_rawname: str
|
package_rawname = package_meta["info"]["name"]
|
||||||
) -> str:
|
|
||||||
simple_page_content = (
|
simple_page_content = (
|
||||||
"<!DOCTYPE html>\n"
|
"<!DOCTYPE html>\n"
|
||||||
"<html>\n"
|
"<html>\n"
|
||||||
@ -355,6 +354,36 @@ class SyncBase:
|
|||||||
def fetch_remote_versions(self) -> dict[str, int]:
|
def fetch_remote_versions(self) -> dict[str, int]:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def parallel_update(self, package_names: list) -> None:
|
||||||
|
with ThreadPoolExecutor(max_workers=self.workers) as executor:
|
||||||
|
futures = {
|
||||||
|
executor.submit(self.do_update, package_name, False): (
|
||||||
|
idx,
|
||||||
|
package_name,
|
||||||
|
)
|
||||||
|
for idx, package_name in enumerate(package_names)
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
for future in tqdm(as_completed(futures), total=len(package_names)):
|
||||||
|
idx, package_name = futures[future]
|
||||||
|
try:
|
||||||
|
serial = future.result()
|
||||||
|
if serial:
|
||||||
|
self.local_db.set(package_name, serial)
|
||||||
|
except Exception as e:
|
||||||
|
if e is ExitProgramException or e is KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
logger.warning(
|
||||||
|
"%s generated an exception", package_name, exc_info=True
|
||||||
|
)
|
||||||
|
if idx % 1000 == 0:
|
||||||
|
self.local_db.dump_json()
|
||||||
|
except (ExitProgramException, KeyboardInterrupt):
|
||||||
|
logger.info("Get ExitProgramException or KeyboardInterrupt, exiting...")
|
||||||
|
for future in futures:
|
||||||
|
future.cancel()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def do_sync_plan(self, plan: Plan) -> None:
|
def do_sync_plan(self, plan: Plan) -> None:
|
||||||
assert self.remote
|
assert self.remote
|
||||||
to_remove = plan.remove
|
to_remove = plan.remove
|
||||||
@ -363,35 +392,7 @@ class SyncBase:
|
|||||||
for package_name in to_remove:
|
for package_name in to_remove:
|
||||||
self.do_remove(package_name)
|
self.do_remove(package_name)
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=self.workers) as executor:
|
self.parallel_update(to_update)
|
||||||
futures = {
|
|
||||||
executor.submit(self.do_update, package_name, False): (
|
|
||||||
idx,
|
|
||||||
package_name,
|
|
||||||
)
|
|
||||||
for idx, package_name in enumerate(to_update)
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
for future in tqdm(as_completed(futures), total=len(to_update)):
|
|
||||||
idx, package_name = futures[future]
|
|
||||||
try:
|
|
||||||
serial = future.result()
|
|
||||||
if serial:
|
|
||||||
self.local_db.set(package_name, serial)
|
|
||||||
except Exception as e:
|
|
||||||
if e is ExitProgramException:
|
|
||||||
raise
|
|
||||||
logger.warning(
|
|
||||||
"%s generated an exception", package_name, exc_info=True
|
|
||||||
)
|
|
||||||
if idx % 1000 == 0:
|
|
||||||
self.local_db.dump_json()
|
|
||||||
except ExitProgramException:
|
|
||||||
logger.info("Get ExitProgramException, exiting...")
|
|
||||||
for future in futures:
|
|
||||||
future.cancel()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def do_remove(self, package_name: str) -> None:
|
def do_remove(self, package_name: str) -> None:
|
||||||
logger.info("removing %s", package_name)
|
logger.info("removing %s", package_name)
|
||||||
@ -462,11 +463,40 @@ class SyncPyPI(SyncBase):
|
|||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
logger.warning("%s missing from upstream, skip.", package_name)
|
logger.warning("%s missing from upstream, skip.", package_name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if self.sync_packages:
|
||||||
|
# sync packages first, then sync index
|
||||||
|
existing_hrefs = []
|
||||||
|
try:
|
||||||
|
with open(package_simple_path / "index.html") as f:
|
||||||
|
contents = f.read()
|
||||||
|
existing_hrefs = get_packages_from_index_html(contents)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
release_files = self.pypi.get_release_files_from_meta(meta)
|
||||||
|
# remove packages that no longer exist remotely
|
||||||
|
remote_hrefs = [self.pypi._file_url_to_local_url(i["url"]) for i in release_files]
|
||||||
|
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
||||||
|
for p in should_remove:
|
||||||
|
logger.info("removing file %s (if exists)", p)
|
||||||
|
package_path = (package_simple_path / p).resolve()
|
||||||
|
package_path.unlink(missing_ok=True)
|
||||||
|
for i in release_files:
|
||||||
|
url = i["url"]
|
||||||
|
dest = (package_simple_path / self.pypi._file_url_to_local_url(i["url"])).resolve()
|
||||||
|
logger.info("downloading file %s -> %s", url, dest)
|
||||||
|
if dest.exists():
|
||||||
|
continue
|
||||||
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
resp = self.session.get(url)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
logger.warning("download %s failed, skipping this package", url)
|
||||||
|
return None
|
||||||
|
with overwrite(dest, "wb") as f:
|
||||||
|
f.write(resp.content)
|
||||||
|
|
||||||
last_serial: int = meta["last_serial"]
|
last_serial: int = meta["last_serial"]
|
||||||
# OK, here we don't bother store raw name
|
simple_html_contents = self.pypi.generate_html_simple_page(meta)
|
||||||
# Considering that JSON API even does not give package raw name, why bother we use it?
|
|
||||||
simple_html_contents = self.pypi.generate_html_simple_page(meta, package_name)
|
|
||||||
simple_json_contents = self.pypi.generate_json_simple_page(meta)
|
simple_json_contents = self.pypi.generate_json_simple_page(meta)
|
||||||
|
|
||||||
for html_filename in ("index.html", "index.v1_html"):
|
for html_filename in ("index.html", "index.v1_html"):
|
||||||
@ -478,9 +508,6 @@ class SyncPyPI(SyncBase):
|
|||||||
with overwrite(json_path) as f:
|
with overwrite(json_path) as f:
|
||||||
f.write(simple_json_contents)
|
f.write(simple_json_contents)
|
||||||
|
|
||||||
if self.sync_packages:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
if write_db:
|
if write_db:
|
||||||
self.local_db.set(package_name, last_serial)
|
self.local_db.set(package_name, last_serial)
|
||||||
|
|
||||||
@ -565,7 +592,7 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
||||||
|
|
||||||
if args.command == "sync":
|
if args.command == "sync":
|
||||||
sync = SyncPyPI(basedir=basedir, local_db=local_db)
|
sync = SyncPyPI(basedir=basedir, local_db=local_db, sync_packages=args.sync_packages)
|
||||||
local = local_db.dump()
|
local = local_db.dump()
|
||||||
plan = sync.determine_sync_plan(local)
|
plan = sync.determine_sync_plan(local)
|
||||||
# save plan for debugging
|
# save plan for debugging
|
||||||
@ -589,8 +616,7 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
simple_dirs = set([i.name for i in (basedir / "simple").iterdir()])
|
simple_dirs = set([i.name for i in (basedir / "simple").iterdir()])
|
||||||
for package_name in simple_dirs - local_names:
|
for package_name in simple_dirs - local_names:
|
||||||
sync.do_remove(package_name)
|
sync.do_remove(package_name)
|
||||||
for package_name in local_names:
|
sync.parallel_update(list(local_names))
|
||||||
sync.do_update(package_name)
|
|
||||||
sync.finalize()
|
sync.finalize()
|
||||||
|
|
||||||
|
|
||||||
@ -599,6 +625,7 @@ if __name__ == "__main__":
|
|||||||
subparsers = parser.add_subparsers(dest="command")
|
subparsers = parser.add_subparsers(dest="command")
|
||||||
|
|
||||||
parser_sync = subparsers.add_parser("sync", help="Sync from upstream")
|
parser_sync = subparsers.add_parser("sync", help="Sync from upstream")
|
||||||
|
parser_sync.add_argument("--sync-packages", help="Sync packages instead of just indexes", action='store_true')
|
||||||
parser_genlocal = subparsers.add_parser(
|
parser_genlocal = subparsers.add_parser(
|
||||||
"genlocal", help="(Re)generate local db and json from simple/"
|
"genlocal", help="(Re)generate local db and json from simple/"
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user