mirror of
https://github.com/tuna/tunasync-scripts.git
synced 2025-04-20 12:42:50 +00:00
77 lines
1.7 KiB
Python
Executable File
77 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import requests
|
|
from pyquery import PyQuery as pq
|
|
|
|
meta_urls = []
|
|
|
|
|
|
def is_metafile_url(url):
|
|
deb_dists=('debian', 'ubuntu', 'raspbian')
|
|
rpm_dists=('fedora', 'centos')
|
|
|
|
for dist in deb_dists:
|
|
if '/'+dist+'/' not in url:
|
|
continue
|
|
if '/Contents-' in url:
|
|
return True
|
|
if '/binary-' in url:
|
|
return True
|
|
if 'Release' in url:
|
|
return True
|
|
|
|
for dist in rpm_dists:
|
|
if '/'+dist+'/' not in url:
|
|
continue
|
|
if '/repodata/' in url:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def recursive_get_filelist(base_url, filter_meta=False):
|
|
if not base_url.endswith('/'):
|
|
yield base_url
|
|
return
|
|
|
|
r = requests.get(base_url)
|
|
if not r.ok:
|
|
return
|
|
|
|
d = pq(r.text)
|
|
for link in d('a'):
|
|
if link.text.startswith('..'):
|
|
continue
|
|
href = base_url + link.text
|
|
if filter_meta and is_metafile_url(href):
|
|
meta_urls.append(href)
|
|
elif link.text.endswith('/'):
|
|
yield from recursive_get_filelist(href, filter_meta=filter_meta)
|
|
else:
|
|
yield href
|
|
|
|
|
|
def get_filelist(base_url):
|
|
yield from recursive_get_filelist(base_url, filter_meta=True)
|
|
|
|
|
|
def get_meta_filelist():
|
|
for url in meta_urls:
|
|
yield from recursive_get_filelist(url, filter_meta=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("base_url", default="https://download.docker.com/")
|
|
args = parser.parse_args()
|
|
|
|
for file_url in get_filelist(args.base_url):
|
|
print(file_url, flush=True)
|
|
|
|
for file_url in get_meta_filelist():
|
|
print(file_url, flush=True)
|
|
|
|
|
|
# vim: ts=4 sw=4 sts=4 expandtab
|