mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 09:12:43 +00:00
Add some docs
This commit is contained in:
parent
af49fb183f
commit
9ecdfee147
59
README.md
59
README.md
@ -6,7 +6,64 @@ Shadowmire syncs PyPI (or plain HTTP(S) PyPI mirrors using Shadowmire) with a li
|
|||||||
|
|
||||||
### Background
|
### Background
|
||||||
|
|
||||||
PyPI's XML-RPC Mirroring Support: <https://warehouse.pypa.io/api-reference/xml-rpc.html#mirroring-support>
|
Bandersnatch is the recommended solution to sync from PyPI. However, it has these 2 issues that haven't been solved for a long time:
|
||||||
|
|
||||||
|
- Bandersnatch does not support removing packages that have been removed from upstream, making it easier to be the target of supply chain attack.
|
||||||
|
- The upstream must implement [XML-RPC APIs](https://warehouse.pypa.io/api-reference/xml-rpc.html#mirroring-support), which is not acceptable for most mirror sites.
|
||||||
|
|
||||||
|
Shadowmire is a light solution to these issues.
|
||||||
|
|
||||||
|
### Syncing Protocol
|
||||||
|
|
||||||
|
#### From PyPI
|
||||||
|
|
||||||
|
PyPI's XML-RPC APIs have `list_packages_with_serial()` method to list ALL packages with "serial" (you could consider it as a version integer that just increases every few moments). `changelog_last_serial()` and `changelog_since_serial()` are NOT used as they could not handle package deletion. Local packages not in the list result are removed.
|
||||||
|
|
||||||
|
Results from `list_packages_with_serial()` are stored in `remote.json`. `local.db` is a sqlite database which just stores every local package name and its local serial. `local.json` is dumped from `local.db` for downstream cosumption.
|
||||||
|
|
||||||
|
#### From upstream using shadowmire
|
||||||
|
|
||||||
|
Obviously, `list_packages_with_serial()`'s alternative is the `local.json`, which could be easily served by any HTTP server. Don't use `local.db`, as it could have consistency issues when shadowmire upstream is syncing.
|
||||||
|
|
||||||
|
### How to use
|
||||||
|
|
||||||
|
If you just need to fetch all indexes (and then use a cache solution for packages):
|
||||||
|
|
||||||
|
```shell
|
||||||
|
REPO=/path/to/pypi ./shadowmire.py sync
|
||||||
|
```
|
||||||
|
|
||||||
|
If `REPO` env is not set, it defaults to current working directory.
|
||||||
|
|
||||||
|
If you need to download all packages, add `--sync-packages`.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./shadowmire.py sync --sync-packages
|
||||||
|
```
|
||||||
|
|
||||||
|
Sync command also supports `--exclude` -- you could give multiple regexes like this:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./shadowmire.py sync --exclude package1 --exclude ^0
|
||||||
|
```
|
||||||
|
|
||||||
|
And `--shadowmire-upstream`, if you don't want to sync from PyPI directly.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./shadowmire.py sync --shadowmire-upstream http://example.com/pypi/
|
||||||
|
```
|
||||||
|
|
||||||
|
If you already have a pypi repo, use `genlocal` first to generate a local db:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./shadowmire.py genlocal
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify command could be used if you believe that something is wrong. It would remove packages NOT in local db, update all local packages, and delete unreferenced files in `packages` folder:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./shadowmire.py verify
|
||||||
|
```
|
||||||
|
|
||||||
## Acknowledgements
|
## Acknowledgements
|
||||||
|
|
||||||
|
53
shadowmire.py
Normal file → Executable file
53
shadowmire.py
Normal file → Executable file
@ -345,7 +345,9 @@ class SyncBase:
|
|||||||
self.packages_dir.mkdir(parents=True, exist_ok=True)
|
self.packages_dir.mkdir(parents=True, exist_ok=True)
|
||||||
self.sync_packages = sync_packages
|
self.sync_packages = sync_packages
|
||||||
|
|
||||||
def filter_remote_with_excludes(self, remote: dict[str, int], excludes: list[re.Pattern]) -> dict[str, int]:
|
def filter_remote_with_excludes(
|
||||||
|
self, remote: dict[str, int], excludes: list[re.Pattern]
|
||||||
|
) -> dict[str, int]:
|
||||||
if not excludes:
|
if not excludes:
|
||||||
return remote
|
return remote
|
||||||
res = {}
|
res = {}
|
||||||
@ -359,7 +361,9 @@ class SyncBase:
|
|||||||
res[k] = v
|
res[k] = v
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def determine_sync_plan(self, local: dict[str, int], excludes: list[re.Pattern]) -> Plan:
|
def determine_sync_plan(
|
||||||
|
self, local: dict[str, int], excludes: list[re.Pattern]
|
||||||
|
) -> Plan:
|
||||||
remote = self.fetch_remote_versions()
|
remote = self.fetch_remote_versions()
|
||||||
remote = self.filter_remote_with_excludes(remote, excludes)
|
remote = self.filter_remote_with_excludes(remote, excludes)
|
||||||
# store remote to remote.json
|
# store remote to remote.json
|
||||||
@ -635,13 +639,22 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
logging.basicConfig(level=log_level)
|
logging.basicConfig(level=log_level)
|
||||||
logger.debug(args)
|
logger.debug(args)
|
||||||
|
|
||||||
basedir = Path(".")
|
basedir = Path(os.environ.get("REPO", "."))
|
||||||
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
local_db = LocalVersionKV(basedir / "local.db", basedir / "local.json")
|
||||||
|
|
||||||
|
sync: SyncBase
|
||||||
if args.command == "sync":
|
if args.command == "sync":
|
||||||
sync = SyncPyPI(
|
if args.shadowmire_upstream:
|
||||||
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
sync = SyncPlainHTTP(
|
||||||
)
|
upstream=args.shadowmire_upstream,
|
||||||
|
basedir=basedir,
|
||||||
|
local_db=local_db,
|
||||||
|
sync_packages=args.sync_packages,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
sync = SyncPyPI(
|
||||||
|
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
||||||
|
)
|
||||||
local = local_db.dump()
|
local = local_db.dump()
|
||||||
plan = sync.determine_sync_plan(local, args.excludes)
|
plan = sync.determine_sync_plan(local, args.excludes)
|
||||||
# save plan for debugging
|
# save plan for debugging
|
||||||
@ -660,11 +673,21 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
local_db.batch_set(local)
|
local_db.batch_set(local)
|
||||||
local_db.dump_json()
|
local_db.dump_json()
|
||||||
elif args.command == "verify":
|
elif args.command == "verify":
|
||||||
sync = SyncPyPI(
|
if args.shadowmire_upstream:
|
||||||
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
sync = SyncPlainHTTP(
|
||||||
)
|
upstream=args.shadowmire_upstream,
|
||||||
|
basedir=basedir,
|
||||||
|
local_db=local_db,
|
||||||
|
sync_packages=args.sync_packages,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
sync = SyncPyPI(
|
||||||
|
basedir=basedir, local_db=local_db, sync_packages=args.sync_packages
|
||||||
|
)
|
||||||
local_names = set(local_db.keys())
|
local_names = set(local_db.keys())
|
||||||
simple_dirs = set([i.name for i in (basedir / "simple").iterdir() if i.is_dir()])
|
simple_dirs = set(
|
||||||
|
[i.name for i in (basedir / "simple").iterdir() if i.is_dir()]
|
||||||
|
)
|
||||||
for package_name in simple_dirs - local_names:
|
for package_name in simple_dirs - local_names:
|
||||||
sync.do_remove(package_name)
|
sync.do_remove(package_name)
|
||||||
sync.parallel_update(list(local_names))
|
sync.parallel_update(list(local_names))
|
||||||
@ -696,6 +719,11 @@ if __name__ == "__main__":
|
|||||||
parser_sync.add_argument(
|
parser_sync.add_argument(
|
||||||
"--exclude", help="Remote package names to exclude. Regex.", nargs="*"
|
"--exclude", help="Remote package names to exclude. Regex.", nargs="*"
|
||||||
)
|
)
|
||||||
|
parser_sync.add_argument(
|
||||||
|
"--shadowmire-upstream",
|
||||||
|
help="Use another upstream using shadowmire instead of PyPI",
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
parser_genlocal = subparsers.add_parser(
|
parser_genlocal = subparsers.add_parser(
|
||||||
"genlocal", help="(Re)generate local db and json from simple/"
|
"genlocal", help="(Re)generate local db and json from simple/"
|
||||||
)
|
)
|
||||||
@ -708,6 +736,11 @@ if __name__ == "__main__":
|
|||||||
help="Sync packages instead of just indexes",
|
help="Sync packages instead of just indexes",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
|
parser_verify.add_argument(
|
||||||
|
"--shadowmire-upstream",
|
||||||
|
help="Use another upstream using shadowmire instead of PyPI",
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.command is None:
|
if args.command is None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user