InterHand数据集下载脚本(python可视化下载)
官方给的脚本我使用会报错,修改后能下载但是没有进度条,对于80g的数据集来说没有进度条下的时候心里挺没底的,网上找了一圈都没有合适的代码,自己参考着写了一个。
import requests
from tqdm import tqdm
def download(url: str, fname: str):
resp = requests.get(url, stream=True) # 获取url数据
total = int(resp.headers.get('content-length', 0)) # 获取文件长度,并将total初始化为0
# 打开当前目录的fname文件(名字自己传入)
# 初始化tqdm,传入总数,文件名等数据,接着就是写入,更新等操作
with open(fname, 'wb') as file, tqdm(
desc=fname,
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
base_url = 'https://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15.s3.amazonaws.com/InterHand2.6M/InterHand2.6M.images.5.fps.v1.0/'
for part1 in ('a', 'b'):
for part2 in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'):
if part1 == 'b' and part2 == 's':
break
download(base_url + 'InterHand2.6M.images.5.fps.v1.0.tar.part' + part1 + part2, 'InterHand2.6M.images.5.fps.v1.0.tar.part' + part1 + part2)
download(base_url + 'InterHand2.6M.images.5.fps.v1.0.tar.CHECKSUM','InterHand2.6M.images.5.fps.v1.0.tar.CHECKSUM')
download(base_url + 'unzip.sh','unzip.sh')
download(base_url + 'verify_download.py','verify_download.py')