Python 从 Excel 读取链接下载文件

Python 从 Excel 读取链接下载文件

import os
from urllib.parse import urljoin, quote, unquote

import requests
import xlrd

base_url = "https://plugins.jetbrains.com"  # 改成自己的url前缀地址
file_path_name = "C:/Users/songk/Desktop/测试文件.xlsx"  # excel 文件的路径,按自己的地址填写


# 获取下载的文件的文件名
def get_file_name(file_url, headers, default_name):
    filename = ''
    if 'Content-Disposition' in headers and headers['Content-Disposition']:
        disposition_split = headers['Content-Disposition'].split(';')
        if len(disposition_split) > 1:
            if disposition_split[1].strip().lower().startswith('filename='):
                file_name = disposition_split[1].split('=')
                if len(file_name) > 1:
                    filename = unquote(file_name[1])
    if not filename and os.path.basename(file_url):
        filename = os.path.basename(file_url).split("?")[0]
    if not filename:
        return default_name
    return os.path.splitext(filename)[0] + "_" + default_name + os.path.splitext(filename)[1]


def download_simple_file(file_url, path, user_name):
    r = requests.get(file_url)
    name = get_file_name(file_url, r.headers, user_name).strip('"')
    file_name = os.path.join(path, name)  # 构建文件名
    with open(file_name, 'wb') as f:
        f.write(r.content)
    print("%s保存成功" % file_name)


def download_stream_file(file_url, path, user_name):
    with requests.get(file_url, stream=True) as r:
        name = get_file_name(file_url, r.headers, user_name).strip('"')
        file_name = os.path.join(path, name)  # 构建文件名
        chunk_size = 1024
        with open(file_name, "wb") as f:
            for chunk in r.iter_content(chunk_size=chunk_size):
                f.write(chunk)
    print("%s保存成功" % file_name)


def download_file(file_path):
    base_dir = os.path.dirname(file_path)
    voice_file_path = os.path.join(base_dir, "录音下载内容")
    # 创建保存文件的目录
    if not os.path.exists(voice_file_path):
        os.makedirs(voice_file_path)
    excel = xlrd.open_workbook(file_path)
    sheets = excel.sheets()
    for sheet in sheets:
        for rx in range(1, sheet.nrows):  # 因为第一行是表头,所以从1开始迭代,跳过表头
            try:
                row = sheet.row(rx)
                user_name = row[0].value.strip()
                temp_url = row[1].value.strip()
                url = urljoin(base_url, quote(temp_url, safe=";/?:@&=+$,"))
                print("开始下载%s - %s" % (user_name, url))
                # download_simple_file(url, voice_file_path, user_name)
                download_stream_file(url, voice_file_path, user_name)  # 如果是大文件就用这种方式下载
            except Exception as e:
                print(e)
                raise e  # 如果确认执行不会保存可以注释


if __name__ == '__main__':
    download_file(file_path_name)
  • 2
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值