GC function of anaconda.py;

This commit is contained in:
z4yx 2020-03-15 10:20:42 +08:00
parent 95f7469d64
commit 783ad4a6ff

View File

@ -68,6 +68,12 @@ logging.basicConfig(
format="[%(asctime)s] [%(levelname)s] %(message)s", format="[%(asctime)s] [%(levelname)s] %(message)s",
) )
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def md5_check(file: Path, md5: str = None): def md5_check(file: Path, md5: str = None):
m = hashlib.md5() m = hashlib.md5()
@ -91,7 +97,7 @@ def curl_download(remote_url: str, dst_file: Path, md5: str = None):
return "MD5 mismatch" return "MD5 mismatch"
def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path): def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool):
logging.info("Start syncing {}".format(repo_url)) logging.info("Start syncing {}".format(repo_url))
local_dir.mkdir(parents=True, exist_ok=True) local_dir.mkdir(parents=True, exist_ok=True)
@ -107,6 +113,8 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path):
with tmp_repodata.open() as f: with tmp_repodata.open() as f:
repodata = json.load(f) repodata = json.load(f)
remote_filelist = []
total_size = 0
packages = repodata['packages'] packages = repodata['packages']
if 'packages.conda' in repodata: if 'packages.conda' in repodata:
packages.update(repodata['packages.conda']) packages.update(repodata['packages.conda'])
@ -115,10 +123,12 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path):
continue continue
file_size, md5 = meta['size'], meta['md5'] file_size, md5 = meta['size'], meta['md5']
total_size += file_size
pkg_url = '/'.join([repo_url, filename]) pkg_url = '/'.join([repo_url, filename])
dst_file = local_dir / filename dst_file = local_dir / filename
dst_file_wip = local_dir / ('.downloading.' + filename) dst_file_wip = local_dir / ('.downloading.' + filename)
remote_filelist.append(dst_file)
if dst_file.is_file(): if dst_file.is_file():
stat = dst_file.stat() stat = dst_file.stat()
@ -142,13 +152,30 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path):
break break
logging.error("Failed to download {}: {}".format(filename, err)) logging.error("Failed to download {}: {}".format(filename, err))
shutil.move(str(tmp_repodata), str(local_dir / "repodata.json")) shutil.move(str(tmp_repodata), str(local_dir / "repodata.json"))
shutil.move(str(tmp_bz2_repodata), str(local_dir / "repodata.json.bz2")) shutil.move(str(tmp_bz2_repodata), str(local_dir / "repodata.json.bz2"))
if delete:
local_filelist = []
delete_count = 0
for i in local_dir.glob('*.tar.bz2'):
local_filelist.append(i)
for i in local_dir.glob('*.conda'):
local_filelist.append(i)
for i in set(local_filelist) - set(remote_filelist):
logging.info("Deleting {}".format(i))
i.unlink()
delete_count += 1
logging.info("{} files deleted".format(delete_count))
logging.info("{}: {} files, {} in total".format(
repodata_url, len(remote_filelist), sizeof_fmt(total_size)))
def sync_installer(repo_url, local_dir: Path): def sync_installer(repo_url, local_dir: Path):
logging.info("Start syncing {}".format(repo_url)) logging.info("Start syncing {}".format(repo_url))
local_dir.mkdir(parents=True, exist_ok=True) local_dir.mkdir(parents=True, exist_ok=True)
full_scan = random.random() < 0.1 # Do full version check less frequently
def remote_list(): def remote_list():
r = requests.get(repo_url, timeout=TIMEOUT_OPTION) r = requests.get(repo_url, timeout=TIMEOUT_OPTION)
@ -178,6 +205,12 @@ def sync_installer(repo_url, local_dir: Path):
if remote_filesize == local_filesize and remote_date.timestamp() == local_mtime and\ if remote_filesize == local_filesize and remote_date.timestamp() == local_mtime and\
(random.random() < 0.95 or md5_check(dst_file, md5)): (random.random() < 0.95 or md5_check(dst_file, md5)):
logging.info("Skipping {}".format(filename)) logging.info("Skipping {}".format(filename))
# Stop the scanning if the most recent version is present
if not full_scan:
logging.info("Stop the scanning")
break
continue continue
logging.info("Removing {}".format(filename)) logging.info("Removing {}".format(filename))
@ -201,6 +234,8 @@ def main():
import argparse import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--working-dir", default=WORKING_DIR) parser.add_argument("--working-dir", default=WORKING_DIR)
parser.add_argument("--delete", action='store_true',
help='delete unreferenced package files')
args = parser.parse_args() args = parser.parse_args()
if args.working_dir is None: if args.working_dir is None:
@ -209,7 +244,6 @@ def main():
working_dir = Path(args.working_dir) working_dir = Path(args.working_dir)
random.seed() random.seed()
if random.random() < 0.1: # Syncing installer less frequently
logging.info("Syncing installers...") logging.info("Syncing installers...")
for dist in ("archive", "miniconda"): for dist in ("archive", "miniconda"):
remote_url = "{}/{}".format(CONDA_REPO_BASE_URL, dist) remote_url = "{}/{}".format(CONDA_REPO_BASE_URL, dist)
@ -226,7 +260,7 @@ def main():
tmpdir = tempfile.mkdtemp() tmpdir = tempfile.mkdtemp()
try: try:
sync_repo(remote_url, local_dir, Path(tmpdir)) sync_repo(remote_url, local_dir, Path(tmpdir), args.delete)
except Exception: except Exception:
logging.exception("Failed to sync repo: {}/{}".format(repo, arch)) logging.exception("Failed to sync repo: {}/{}".format(repo, arch))
finally: finally:
@ -238,7 +272,7 @@ def main():
tmpdir = tempfile.mkdtemp() tmpdir = tempfile.mkdtemp()
try: try:
sync_repo(remote_url, local_dir, Path(tmpdir)) sync_repo(remote_url, local_dir, Path(tmpdir), args.delete)
except Exception: except Exception:
logging.exception("Failed to sync repo: {}".format(repo)) logging.exception("Failed to sync repo: {}".format(repo))
finally: finally: