mirror of
https://github.com/taoky/shadowmire.git
synced 2025-07-08 17:32:43 +00:00
Use unquote to handle encoded path
In previous versions shadowmire fails to handle filename like "logic gates.tar", instead it would download as "logic%20gates.tar". Existing instances using shadowmire might need to re-verify.
This commit is contained in:
parent
99bd4d932e
commit
47529107ae
@ -7,7 +7,7 @@ import xmlrpc.client
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
from urllib.parse import urljoin, urlparse, urlunparse
|
from urllib.parse import urljoin, urlparse, urlunparse, unquote
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
import logging
|
import logging
|
||||||
@ -340,11 +340,25 @@ class PyPI:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def file_url_to_local_url(url: str) -> str:
|
def file_url_to_local_url(url: str) -> str:
|
||||||
|
"""
|
||||||
|
This function should NOT be used to construct a local Path!
|
||||||
|
"""
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
assert parsed.path.startswith("/packages")
|
assert parsed.path.startswith("/packages")
|
||||||
prefix = "../.."
|
prefix = "../.."
|
||||||
return prefix + parsed.path
|
return prefix + parsed.path
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def file_url_to_local_path(url: str) -> Path:
|
||||||
|
"""
|
||||||
|
Unquote() and returns a Path
|
||||||
|
"""
|
||||||
|
path = urlparse(url).path
|
||||||
|
path = unquote(path)
|
||||||
|
assert path.startswith("/packages")
|
||||||
|
path = path[1:]
|
||||||
|
return Path("../..") / path
|
||||||
|
|
||||||
# Func modified from bandersnatch
|
# Func modified from bandersnatch
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate_html_simple_page(cls, package_meta: dict) -> str:
|
def generate_html_simple_page(cls, package_meta: dict) -> str:
|
||||||
@ -574,13 +588,14 @@ class SyncBase:
|
|||||||
# OK, check if all hrefs have corresponding files
|
# OK, check if all hrefs have corresponding files
|
||||||
if self.sync_packages:
|
if self.sync_packages:
|
||||||
for href, size in hrefsize_json:
|
for href, size in hrefsize_json:
|
||||||
dest_pathstr = normpath(package_simple_path / href)
|
relative_path = unquote(href)
|
||||||
|
dest_pathstr = normpath(package_simple_path / relative_path)
|
||||||
try:
|
try:
|
||||||
# Fast shortcut to avoid stat() it
|
# Fast shortcut to avoid stat() it
|
||||||
if dest_pathstr not in packages_pathcache:
|
if dest_pathstr not in packages_pathcache:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
if compare_size and size != -1:
|
if compare_size and size != -1:
|
||||||
dest = Path(normpath(package_simple_path / href))
|
dest = Path(dest_pathstr)
|
||||||
# So, do stat() for real only when we need to do so,
|
# So, do stat() for real only when we need to do so,
|
||||||
# have a size, and it really exists in pathcache.
|
# have a size, and it really exists in pathcache.
|
||||||
dest_stat = dest.stat()
|
dest_stat = dest.stat()
|
||||||
@ -849,7 +864,8 @@ class SyncPyPI(SyncBase):
|
|||||||
self.pypi.file_url_to_local_url(i["url"]) for i in release_files
|
self.pypi.file_url_to_local_url(i["url"]) for i in release_files
|
||||||
]
|
]
|
||||||
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
||||||
for p in should_remove:
|
for href in should_remove:
|
||||||
|
p = unquote(href)
|
||||||
logger.info("removing file %s (if exists)", p)
|
logger.info("removing file %s (if exists)", p)
|
||||||
package_path = Path(normpath(package_simple_path / p))
|
package_path = Path(normpath(package_simple_path / p))
|
||||||
package_path.unlink(missing_ok=True)
|
package_path.unlink(missing_ok=True)
|
||||||
@ -857,7 +873,7 @@ class SyncPyPI(SyncBase):
|
|||||||
url = i["url"]
|
url = i["url"]
|
||||||
dest = Path(
|
dest = Path(
|
||||||
normpath(
|
normpath(
|
||||||
package_simple_path / self.pypi.file_url_to_local_url(i["url"])
|
package_simple_path / self.pypi.file_url_to_local_path(i["url"])
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logger.info("downloading file %s -> %s", url, dest)
|
logger.info("downloading file %s -> %s", url, dest)
|
||||||
@ -951,15 +967,17 @@ class SyncPlainHTTP(SyncBase):
|
|||||||
release_files = PyPI.get_release_files_from_meta(meta)
|
release_files = PyPI.get_release_files_from_meta(meta)
|
||||||
remote_hrefs = [PyPI.file_url_to_local_url(i["url"]) for i in release_files]
|
remote_hrefs = [PyPI.file_url_to_local_url(i["url"]) for i in release_files]
|
||||||
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
should_remove = list(set(existing_hrefs) - set(remote_hrefs))
|
||||||
for p in should_remove:
|
for href in should_remove:
|
||||||
|
p = unquote(href)
|
||||||
logger.info("removing file %s (if exists)", p)
|
logger.info("removing file %s (if exists)", p)
|
||||||
package_path = Path(normpath(package_simple_path / p))
|
package_path = Path(normpath(package_simple_path / p))
|
||||||
package_path.unlink(missing_ok=True)
|
package_path.unlink(missing_ok=True)
|
||||||
package_simple_url = urljoin(self.upstream, f"simple/{package_name}/")
|
package_simple_url = urljoin(self.upstream, f"simple/{package_name}/")
|
||||||
for i in release_files:
|
for i in release_files:
|
||||||
href = PyPI.file_url_to_local_url(i["url"])
|
href = PyPI.file_url_to_local_url(i["url"])
|
||||||
|
path = PyPI.file_url_to_local_path(i["url"])
|
||||||
url = urljoin(package_simple_url, href)
|
url = urljoin(package_simple_url, href)
|
||||||
dest = Path(normpath(package_simple_path / href))
|
dest = Path(normpath(package_simple_path / path))
|
||||||
logger.info("downloading file %s -> %s", url, dest)
|
logger.info("downloading file %s -> %s", url, dest)
|
||||||
if self.skip_this_package(i, dest):
|
if self.skip_this_package(i, dest):
|
||||||
continue
|
continue
|
||||||
@ -1313,12 +1331,12 @@ def verify(
|
|||||||
hrefs = get_existing_hrefs(sd)
|
hrefs = get_existing_hrefs(sd)
|
||||||
hrefs = [] if hrefs is None else hrefs
|
hrefs = [] if hrefs is None else hrefs
|
||||||
nps = []
|
nps = []
|
||||||
for i in hrefs:
|
for href in hrefs:
|
||||||
|
i = unquote(href)
|
||||||
# use normpath, which is much faster than pathlib resolve(), as it does not need to access fs
|
# use normpath, which is much faster than pathlib resolve(), as it does not need to access fs
|
||||||
# we could make sure no symlinks could affect this here
|
# we could make sure no symlinks could affect this here
|
||||||
np = normpath(sd / i)
|
np = normpath(sd / i)
|
||||||
logger.debug("add to ref_set: %s", np)
|
logger.debug("add to ref_set: %s", np)
|
||||||
# ref_set.add(np)
|
|
||||||
nps.append(np)
|
nps.append(np)
|
||||||
return nps
|
return nps
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
# It requires a full simple/ and db (genlocal-ed)
|
# It requires a full simple/ and db (genlocal-ed)
|
||||||
# Call like: python -m utils.create_package_stubs /path/to/pypi/
|
# Call like: python -m utils.create_package_stubs /path/to/pypi/
|
||||||
|
|
||||||
|
from urllib.parse import unquote
|
||||||
from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json
|
from shadowmire import LocalVersionKV, get_package_urls_size_from_index_json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
@ -28,7 +29,8 @@ if __name__ == "__main__":
|
|||||||
json_simple = package_simple_path / "index.v1_json"
|
json_simple = package_simple_path / "index.v1_json"
|
||||||
hrefsize_json = get_package_urls_size_from_index_json(json_simple)
|
hrefsize_json = get_package_urls_size_from_index_json(json_simple)
|
||||||
for href, _ in hrefsize_json:
|
for href, _ in hrefsize_json:
|
||||||
dest = Path(normpath(package_simple_path / href))
|
relative = unquote(href)
|
||||||
|
dest = Path(normpath(package_simple_path / relative))
|
||||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
if not dest.exists():
|
if not dest.exists():
|
||||||
dest.touch()
|
dest.touch()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user