tunasync-scripts/helpers/docker-ce-filelist.py
2017-08-21 14:38:21 +08:00

77 lines
1.7 KiB
Python
Executable File

#!/usr/bin/env python3
import requests
from pyquery import PyQuery as pq
meta_urls = []
def is_metafile_url(url):
deb_dists=('debian', 'ubuntu', 'raspbian')
rpm_dists=('fedora', 'centos')
for dist in deb_dists:
if '/'+dist+'/' not in url:
continue
if '/Contents-' in url:
return True
if '/binary-' in url:
return True
if 'Release' in url:
return True
for dist in rpm_dists:
if '/'+dist+'/' not in url:
continue
if '/repodata/' in url:
return True
return False
def recursive_get_filelist(base_url, filter_meta=False):
if not base_url.endswith('/'):
yield base_url
return
r = requests.get(base_url)
if not r.ok:
return
d = pq(r.text)
for link in d('a'):
if link.text.startswith('..'):
continue
href = base_url + link.text
if filter_meta and is_metafile_url(href):
meta_urls.append(href)
elif link.text.endswith('/'):
yield from recursive_get_filelist(href, filter_meta=filter_meta)
else:
yield href
def get_filelist(base_url):
yield from recursive_get_filelist(base_url, filter_meta=True)
def get_meta_filelist():
for url in meta_urls:
yield from recursive_get_filelist(url, filter_meta=False)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("base_url", default="https://download.docker.com/")
args = parser.parse_args()
for file_url in get_filelist(args.base_url):
print(file_url, flush=True)
for file_url in get_meta_filelist():
print(file_url, flush=True)
# vim: ts=4 sw=4 sts=4 expandtab