- 1、从response的headers中获取文件大小,同时注意:get请求中#stream=True参数的作用是仅让响应头被下载,连接保持打开状态
- .2、以wb模式打开文件
- 3、使用注意调用iter_content,一块一块的遍历要下载的内容,搭配stream=True,此时才开始真正的下载
import requests
from tqdm import tqdm
import os
import warnings
warnings.filterwarnings('ignore')
def download_file(url):
print("------","Start download with urllib")
name=url.split("/")[-1]
resp = requests.get(url,stream=True)
content_size = int(resp.headers['Content-Length']) / 1024 # 确定整个安装包的大小
#下载到上一级目录
path = os.path.abspath(os.path.dirname(os.getcwd())) + "\\" + name
#下载到该目录
path = os.getcwd()+ "\\" + name
print("File path: ",path)
with open(path, "wb") as file:
print("File total size is: ", content_size)
for data in tqdm(iterable=resp.iter_content(1024), total=content_size, unit='k', desc=name):
file.write(data)
print("------","finish download with urllib\n\n")
download_file("http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531842/articles.csv")
download_file("http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531842/articles_emb.csv")
download_file("http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531842/testA_click_log.csv")
download_file("http://tianchi-competition.oss-cn-hangzhou.aliyuncs.com/531842/train_click_log.csv")