mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 17:32:43 +00:00
SyncPlainHTTP Packages downloading support
This commit is contained in:
parent
a4574ecaeb
commit
52ceabcdd4
100
shadowmire.py
100
shadowmire.py
@ -163,6 +163,17 @@ def get_packages_from_index_html(contents: str) -> list[str]:
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def get_existing_hrefs(package_simple_path: Path) -> list[str]:
|
||||||
|
existing_hrefs = []
|
||||||
|
try:
|
||||||
|
with open(package_simple_path / "index.html") as f:
|
||||||
|
contents = f.read()
|
||||||
|
existing_hrefs = get_packages_from_index_html(contents)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
return existing_hrefs
|
||||||
|
|
||||||
|
|
||||||
class CustomXMLRPCTransport(xmlrpc.client.Transport):
|
class CustomXMLRPCTransport(xmlrpc.client.Transport):
|
||||||
"""
|
"""
|
||||||
Set user-agent for xmlrpc.client
|
Set user-agent for xmlrpc.client
|
||||||
@ -208,7 +219,7 @@ class PyPI:
|
|||||||
release_files.sort(key=lambda x: x["filename"])
|
release_files.sort(key=lambda x: x["filename"])
|
||||||
return release_files
|
return release_files
|
||||||
|
|
||||||
def _file_url_to_local_url(self, url: str) -> str:
|
def file_url_to_local_url(self, url: str) -> str:
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
assert parsed.path.startswith("/packages")
|
assert parsed.path.startswith("/packages")
|
||||||
prefix = "../.."
|
prefix = "../.."
|
||||||
@ -253,7 +264,7 @@ class PyPI:
|
|||||||
simple_page_content += "\n".join(
|
simple_page_content += "\n".join(
|
||||||
[
|
[
|
||||||
' <a href="{}#{}={}"{}>{}</a><br/>'.format(
|
' <a href="{}#{}={}"{}>{}</a><br/>'.format(
|
||||||
self._file_url_to_local_url(r["url"]),
|
self.file_url_to_local_url(r["url"]),
|
||||||
self.digest_name,
|
self.digest_name,
|
||||||
r["digests"][self.digest_name],
|
r["digests"][self.digest_name],
|
||||||
gen_html_file_tags(r),
|
gen_html_file_tags(r),
|
||||||
@ -295,7 +306,7 @@ class PyPI:
|
|||||||
"requires-python": r.get("requires_python", ""),
|
"requires-python": r.get("requires_python", ""),
|
||||||
"size": r["size"],
|
"size": r["size"],
|
||||||
"upload-time": r.get("upload_time_iso_8601", ""),
|
"upload-time": r.get("upload_time_iso_8601", ""),
|
||||||
"url": self._file_url_to_local_url(r["url"]),
|
"url": self.file_url_to_local_url(r["url"]),
|
||||||
"yanked": r.get("yanked", False),
|
"yanked": r.get("yanked", False),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -438,6 +449,16 @@ class SyncBase:
|
|||||||
f.write(" </body>\n</html>")
|
f.write(" </body>\n</html>")
|
||||||
|
|
||||||
|
|
||||||
|
def download(session: requests.Session, url: str, dest: Path) -> bool:
|
||||||
|
resp = session.get(url, allow_redirects=True)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
logger.warning("download %s failed, skipping this package", url)
|
||||||
|
return False
|
||||||
|
with overwrite(dest, "wb") as f:
|
||||||
|
f.write(resp.content)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class SyncPyPI(SyncBase):
|
class SyncPyPI(SyncBase):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, basedir: Path, local_db: LocalVersionKV, sync_packages: bool = False
|
self, basedir: Path, local_db: LocalVersionKV, sync_packages: bool = False
|
||||||
@ -463,19 +484,15 @@ class SyncPyPI(SyncBase):
|
|||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
logger.warning("%s missing from upstream, skip.", package_name)
|
logger.warning("%s missing from upstream, skip.", package_name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if self.sync_packages:
|
if self.sync_packages:
|
||||||
# sync packages first, then sync index
|
# sync packages first, then sync index
|
||||||
existing_hrefs = []
|
existing_hrefs = get_existing_hrefs(package_simple_path)
|
||||||
try:
|
|
||||||
with open(package_simple_path / "index.html") as f:
|
|
||||||
contents = f.read()
|
|
||||||
existing_hrefs = get_packages_from_index_html(contents)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
release_files = self.pypi.get_release_files_from_meta(meta)
|
release_files = self.pypi.get_release_files_from_meta(meta)
|
||||||
# remove packages that no longer exist remotely
|
# remove packages that no longer exist remotely
|
||||||
remote_hrefs = [self.pypi._file_url_to_local_url(i["url"]) for i in release_files]
|
remote_hrefs = [
|
||||||
|
self.pypi.file_url_to_local_url(i["url"]) for i in release_files
|
||||||
|
]
|
||||||
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
||||||
for p in should_remove:
|
for p in should_remove:
|
||||||
logger.info("removing file %s (if exists)", p)
|
logger.info("removing file %s (if exists)", p)
|
||||||
@ -483,17 +500,14 @@ class SyncPyPI(SyncBase):
|
|||||||
package_path.unlink(missing_ok=True)
|
package_path.unlink(missing_ok=True)
|
||||||
for i in release_files:
|
for i in release_files:
|
||||||
url = i["url"]
|
url = i["url"]
|
||||||
dest = (package_simple_path / self.pypi._file_url_to_local_url(i["url"])).resolve()
|
dest = (
|
||||||
|
package_simple_path / self.pypi.file_url_to_local_url(i["url"])
|
||||||
|
).resolve()
|
||||||
logger.info("downloading file %s -> %s", url, dest)
|
logger.info("downloading file %s -> %s", url, dest)
|
||||||
if dest.exists():
|
if dest.exists():
|
||||||
continue
|
continue
|
||||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
resp = self.session.get(url)
|
download(self.session, url, dest)
|
||||||
if resp.status_code >= 400:
|
|
||||||
logger.warning("download %s failed, skipping this package", url)
|
|
||||||
return None
|
|
||||||
with overwrite(dest, "wb") as f:
|
|
||||||
f.write(resp.content)
|
|
||||||
|
|
||||||
last_serial: int = meta["last_serial"]
|
last_serial: int = meta["last_serial"]
|
||||||
simple_html_contents = self.pypi.generate_html_simple_page(meta)
|
simple_html_contents = self.pypi.generate_html_simple_page(meta)
|
||||||
@ -537,25 +551,38 @@ class SyncPlainHTTP(SyncBase):
|
|||||||
logger.info("updating %s", package_name)
|
logger.info("updating %s", package_name)
|
||||||
package_simple_path = self.simple_dir / package_name
|
package_simple_path = self.simple_dir / package_name
|
||||||
package_simple_path.mkdir(exist_ok=True)
|
package_simple_path.mkdir(exist_ok=True)
|
||||||
|
if self.sync_packages:
|
||||||
|
existing_hrefs = get_existing_hrefs(package_simple_path)
|
||||||
# directly fetch remote files
|
# directly fetch remote files
|
||||||
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
for filename in ("index.html", "index.v1_html", "index.v1_json"):
|
||||||
file_url = urljoin(self.upstream, f"/simple/{package_name}/{filename}")
|
file_url = urljoin(self.upstream, f"/simple/{package_name}/{filename}")
|
||||||
resp = self.session.get(file_url)
|
success = download(self.session, file_url, package_simple_path / filename)
|
||||||
if resp.status_code == 404:
|
if not success:
|
||||||
if filename != "index.html":
|
if filename != "index.html":
|
||||||
logger.warning("%s does not exist", file_url)
|
logger.warning("%s does not exist", file_url)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.error("%s does not exist. Stop with this.", file_url)
|
logger.error("%s does not exist. Stop with this.", file_url)
|
||||||
return None
|
return None
|
||||||
else:
|
|
||||||
resp.raise_for_status()
|
|
||||||
content = resp.content
|
|
||||||
with open(package_simple_path / filename, "wb") as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
if self.sync_packages:
|
if self.sync_packages:
|
||||||
raise NotImplementedError
|
current_hrefs = get_existing_hrefs(package_simple_path)
|
||||||
|
should_remove = list(set(existing_hrefs) - set(current_hrefs))
|
||||||
|
for p in should_remove:
|
||||||
|
logger.info("removing file %s (if exists)", p)
|
||||||
|
package_path = (package_simple_path / p).resolve()
|
||||||
|
package_path.unlink(missing_ok=True)
|
||||||
|
package_simple_url = urljoin(self.upstream, f"/simple/{package_name}/")
|
||||||
|
for href in current_hrefs:
|
||||||
|
url = urljoin(package_simple_url, href)
|
||||||
|
dest = (
|
||||||
|
package_simple_path / href
|
||||||
|
).resolve()
|
||||||
|
logger.info("downloading file %s -> %s", url, dest)
|
||||||
|
if dest.exists():
|
||||||
|
continue
|
||||||
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
download(self.session, url, dest)
|
||||||
|
|
||||||
last_serial = get_local_serial(package_simple_path)
|
last_serial = get_local_serial(package_simple_path)
|
||||||
if not last_serial:
|
if not last_serial:
|
||||||
@ -592,7 +619,9 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
||||||
|
|
||||||
if args.command == "sync":
|
if args.command == "sync":
|
||||||
sync = SyncPyPI(basedir=basedir, local_db=local_db, sync_packages=args.sync_packages)
|
sync = SyncPyPI(
|
||||||
|
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
||||||
|
)
|
||||||
local = local_db.dump()
|
local = local_db.dump()
|
||||||
plan = sync.determine_sync_plan(local)
|
plan = sync.determine_sync_plan(local)
|
||||||
# save plan for debugging
|
# save plan for debugging
|
||||||
@ -611,7 +640,9 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
local_db.batch_set(local)
|
local_db.batch_set(local)
|
||||||
local_db.dump_json()
|
local_db.dump_json()
|
||||||
elif args.command == "verify":
|
elif args.command == "verify":
|
||||||
sync = SyncPyPI(basedir=basedir, local_db=local_db)
|
sync = SyncPyPI(
|
||||||
|
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
||||||
|
)
|
||||||
local_names = set(local_db.keys())
|
local_names = set(local_db.keys())
|
||||||
simple_dirs = set([i.name for i in (basedir / "simple").iterdir()])
|
simple_dirs = set([i.name for i in (basedir / "simple").iterdir()])
|
||||||
for package_name in simple_dirs - local_names:
|
for package_name in simple_dirs - local_names:
|
||||||
@ -625,7 +656,11 @@ if __name__ == "__main__":
|
|||||||
subparsers = parser.add_subparsers(dest="command")
|
subparsers = parser.add_subparsers(dest="command")
|
||||||
|
|
||||||
parser_sync = subparsers.add_parser("sync", help="Sync from upstream")
|
parser_sync = subparsers.add_parser("sync", help="Sync from upstream")
|
||||||
parser_sync.add_argument("--sync-packages", help="Sync packages instead of just indexes", action='store_true')
|
parser_sync.add_argument(
|
||||||
|
"--sync-packages",
|
||||||
|
help="Sync packages instead of just indexes",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
parser_genlocal = subparsers.add_parser(
|
parser_genlocal = subparsers.add_parser(
|
||||||
"genlocal", help="(Re)generate local db and json from simple/"
|
"genlocal", help="(Re)generate local db and json from simple/"
|
||||||
)
|
)
|
||||||
@ -633,6 +668,11 @@ if __name__ == "__main__":
|
|||||||
"verify",
|
"verify",
|
||||||
help="Verify existing sync from local db, download missing things, remove unreferenced packages",
|
help="Verify existing sync from local db, download missing things, remove unreferenced packages",
|
||||||
)
|
)
|
||||||
|
parser_verify.add_argument(
|
||||||
|
"--sync-packages",
|
||||||
|
help="Sync packages instead of just indexes",
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.command is None:
|
if args.command is None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user