一、分块下载
特点:可以实现断点续下
import os
import requests
def download_file_with_resume(url, dest_path, chunk_size):
# 获取文件的总大小
response = requests.head(url)
total_size = int(response.headers.get('content-length', 0))
# 检查文件是否已部分下载
downloaded_size = 0
if os.path.exists(dest_path):
downloaded_size = os.path.getsize(dest_path)
with open(dest_path, 'ab') as f:
# 从上次下载的位置继续下载
for start in range(downloaded_size, total_size, chunk_size):
end = min(start + chunk_size - 1, total_size - 1)
headers = {'Range': f'bytes={start}-{end}'}
part_response = requests.get(url, headers=headers, stream=True)
total_size = int(part_response.headers.get('content-length', 0))
print("total_size:",total_size)
if part_response.status_code == 206 or part_response.status_code == 200:
f.write(part_response.content)
else:
raise Exception(f"Failed to download chunk: {part_response.status_code}")
print(f"Download completed: {dest_path}")
# 示例用法
url = 'https://xxxx.com//video.mp4'
dest_path = 'abc.mp4'
chunk_size = 1024*1024*1 # 1MB
download_file_with_resume(url, dest_path,chunk_size)
二、线程池分块下载(大幅提高下载速度)
特点:下载速度快,1GB几十秒内下完
import os
import requests
import math
import threadpool
import time
import shutil
class BlockDownload:
def __init__(self):
self.error_try = 5 # 请求下载错误重试次数
self.wait_time = 3 # 请求下载错误等待时间
self.proxies = {'http':'127.0.0.1:7890','https':'127.0.0.1:7890'}
self.timeout = 6
def download_chunk(self,url, start_byte, end_byte,chunk_num, output_file_path):
"""
下载文件的一个块并保存到临时文件。
"""
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/547.34 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/547.34',
'Range':f'bytes={start_byte}-{end_byte}'
}
for i in range(self.error_try):
try:
response = requests.get(url, headers=headers,timeout=self.timeout,proxies=self.proxies,stream=True)
if response.status_code == 206 or response.status_code == 200:
with open(f'{output_file_path}.part{chunk_num}', 'wb') as file:
file.write(response.content)
print(f"块 {chunk_num} 下载完成。")
return
else:
print(f"块 {chunk_num} 下载失败,状态码: {response.status_code}")
continue
except Exception as e:
print(f"块 {chunk_num} 下载错误,{i} 次重试!",e)
time.sleep(self.wait_time)
continue
print(f"块 {chunk_num} 下载失败,程序结束!")
exit(0)
def merge_chunks(self,output_file_path, num_chunks):
"""
合并所有下载的块到最终文件。
"""
with open(output_file_path, 'wb') as output_file:
for i in range(num_chunks):
temp_file_path = f'{output_file_path}.part{i}'
with open(temp_file_path, 'rb') as temp_file:
shutil.copyfileobj(temp_file,output_file)
os.remove(temp_file_path)
print(f"所有块已合并为: {output_file_path}")
def get_optimal_chunk_size(slef,total_size):
"""
获取最佳分块大小
"""
if total_size < 64 * 1024 * 1024: # 小于64MB
return 1 * 1024 * 1024 # 1MB
elif total_size < 512 * 1024 * 1024: # 小于512MB
return 2 * 1024 * 1024 # 2MB
elif total_size < 1024 * 1024 * 1024: # 小于1024MB
return 4 * 1024 * 1024 # 4MB
else:
return 8 * 1024 * 1024 # 8MB
def thread_start(self,url,file_path,thread_num):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/547.34 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/547.34'
}
response = requests.head(url,headers=headers,timeout=self.timeout,proxies=self.proxies)
total_size = int(response.headers.get('content-length', 0))
if not total_size:
response = requests.head(response.headers.get('Location', url), headers=headers, timeout=self.timeout, proxies=self.proxies)
total_size = int(response.headers.get('content-length', 0))
if not total_size:
print(f"视频大小为:{total_size}")
return
print(f"文件总大小:{total_size},开始下载...")
chunk_size = self.get_optimal_chunk_size(total_size)
block_num = math.ceil(total_size / chunk_size) # 总分块数
arguments_list = [] # 创建存放任务参数列表
for i in range(block_num):
start_byte = i * chunk_size
end_byte = start_byte + chunk_size - 1
if i == block_num - 1:
end_byte = total_size - 1 # 最后一个块可能会包含剩余的所有字节
arguments_list.append(([url, start_byte, end_byte, i, file_path], None))
pool = threadpool.ThreadPool(thread_num) # 创建线程
tasks_list = threadpool.makeRequests(self.download_chunk, arguments_list) # 按照参数列表长度创建任务列表
for task in tasks_list:
pool.putRequest(task) # 将要执行的任务放入线程池
pool.wait()
self.merge_chunks(file_path, block_num)
def run(self):
download_url = 'https://xxxx.com/video.mp4'
file_path = r'D:\Video\xxx.mp4'
thread_num = 32 # 线程数,最好根据视频大小、带宽设置
self.thread_start(download_url,file_path,thread_num)
if __name__ == '__main__':
bd = BlockDownload()
bd.run()