Python 从 Excel 读取链接下载文件
import os
from urllib.parse import urljoin, quote, unquote
import requests
import xlrd
base_url = "https://plugins.jetbrains.com" # 改成自己的url前缀地址
file_path_name = "C:/Users/songk/Desktop/测试文件.xlsx" # excel 文件的路径,按自己的地址填写
# 获取下载的文件的文件名
def get_file_name(file_url, headers, default_name):
filename = ''
if 'Content-Disposition' in headers and headers['Content-Disposition']:
disposition_split = headers['Content-Disposition'].split(';')
if len(disposition_split) > 1:
if disposition_split[1].strip().lower().startswith('filename='):
file_name = disposition_split[1].split('=')
if len(file_name) > 1:
filename = unquote(file_name[1])
if not filename and os.path.basename(file_url):
filename = os.path.basename(file_url).split("?")[0]
if not filename:
return default_name
return os.path.splitext(filename)[0] + "_" + default_name + os.path.splitext(filename)[1]
def download_simple_file(file_url, path, user_name):
r = requests.get(file_url)
name = get_file_name(file_url, r.headers, user_name).strip('"')
file_name = os.path.join(path, name) # 构建文件名
with open(file_name, 'wb') as f:
f.write(r.content)
print("%s保存成功" % file_name)
def download_stream_file(file_url, path, user_name):
with requests.get(file_url, stream=True) as r:
name = get_file_name(file_url, r.headers, user_name).strip('"')
file_name = os.path.join(path, name) # 构建文件名
chunk_size = 1024
with open(file_name, "wb") as f:
for chunk in r.iter_content(chunk_size=chunk_size):
f.write(chunk)
print("%s保存成功" % file_name)
def download_file(file_path):
base_dir = os.path.dirname(file_path)
voice_file_path = os.path.join(base_dir, "录音下载内容")
# 创建保存文件的目录
if not os.path.exists(voice_file_path):
os.makedirs(voice_file_path)
excel = xlrd.open_workbook(file_path)
sheets = excel.sheets()
for sheet in sheets:
for rx in range(1, sheet.nrows): # 因为第一行是表头,所以从1开始迭代,跳过表头
try:
row = sheet.row(rx)
user_name = row[0].value.strip()
temp_url = row[1].value.strip()
url = urljoin(base_url, quote(temp_url, safe=";/?:@&=+$,"))
print("开始下载%s - %s" % (user_name, url))
# download_simple_file(url, voice_file_path, user_name)
download_stream_file(url, voice_file_path, user_name) # 如果是大文件就用这种方式下载
except Exception as e:
print(e)
raise e # 如果确认执行不会保存可以注释
if __name__ == '__main__':
download_file(file_path_name)