百度网盘-大文件分片上传实现python代码

基于百度网盘需要共享股票、期货等行情数据数据,尤其1G左右的大文件居多,所以自己基于网盘提供的sdk包,开发大文件分片上传的代码,方便自动化上传和分享,以下是实现的具体依据和步骤,请大家参考。

Python语言的SDK包

Python SDK使用入门

Python SDK使用入门

上传流程简介

上传流程是指,用户将本地文件上传到百度网盘云端服务器的过程。文件上传分为三个阶段:预上传、分片上传、创建文件。第二个阶段分片上传依赖第一个阶段预上传的结果,第三个阶段创建文件依赖第一个阶段预上传和第二阶段分片上传的结果,串行完成这三个阶段任务后,本地文件成功上传到网盘服务器。

前置条件
在使用上传接口之前,请确认已完成整个接入流程:

1、你已经登录了百度账号,具体流程参考【注册与登录】。
2、你已经通过了实名认证,具体流程参考【实名认证】。
3、你已经在控制台完成应用创建并获得接入凭证,具体流程参考【创建应用】。
4、你已经为你的应用接入授权,并且获得可用的access_token,具体流程参考【接入授权】。

限制条件
目录限制
每个第三方应用在网盘只能拥有一个文件夹用于存储上传文件,该文件夹必须位于/apps目录下,apps下的文件夹名称为申请接入时填写的申请接入的产品名称。如申请接入的产品名称为云存储,那么该文件夹为/apps/云存储,用户看到的文件夹为/我的应用数据/云存储。

大小限制
所有开发者均可接入使用接口,但可上传单个文件大小根据授权用户的身份有不同的限制:
普通用户单个上传文件大小上限为4GB
会员用户单个上传文件大小上限为10GB
超级会员用户单个上传文件大小上限为20GB
注:分片数量不得超过1024个

类型限制
普通用户在网盘APP端无法上传视频、Live Photo类型的文件。

大文件分片上传的代码如下,填入token就可以跑通亲测有效。其中有几处错序、字符串LIST循环等小bug也修复了。

# !/usr/bin/env python3
"""
    xpan upload
    include:
        precreate
        upload
        create
"""
import os
import sys
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(BASE_DIR)
from pprint import pprint
from openapi_client.api import fileupload_api
import openapi_client
import pdb
import hashlib

"""
切分指定大小文件到指定路径
"""
def split(fromfile, todir, chunksize = 4*1024*1024):
    if not os.path.exists(todir):  # check whether todir exists or not
        os.mkdir(todir)
    else:
        for fname in os.listdir(todir):
            os.remove(os.path.join(todir, fname))
    paths = []
    partnum = 0
    inputfile = open(fromfile, 'rb')  # open the fromfile
    file_name = os.path.basename(fromfile)
    while True:
        chunk = inputfile.read(chunksize)
        if not chunk:  # check the chunk is empty
            break
        filename = os.path.join(todir, ('%s.part%04d' % (file_name, partnum)))
        paths.append(filename)
        fileobj = open(filename, 'wb')  # make partfile
        fileobj.write(chunk)  # write data into partfile
        fileobj.close()
        partnum += 1
    return paths

"""
获取文件MD5值
"""
def get_file_md5(file_name):
    m = hashlib.md5()  # 创建md5对象
    with open(file_name, 'rb') as fobj:
        while True:
            data = fobj.read(4096)
            if not data:
                break
            m.update(data)  # 更新md5对象
    return m.hexdigest()  # 返回md5对象

"""
获取文件夹下文件MD5值
"""
def get_files_md5(dir_path):
    paths = []
    md5s = []
    for file_name in sorted(os.listdir(dir_path)):
        path = os.path.join(dir_path, file_name)
        if not os.path.isdir(path) and not file_name.startswith('.'):
            md5 = get_file_md5(path)
            paths.append(path)
            md5s.append(md5)
    return paths, md5s

"""
获取文件MD5值2
"""
def get_slice_md5(file_name):
    m = hashlib.md5()
    with open(file_name, 'rb') as fobj:
        data = fobj.read(256 * 1024)
        m.update(data)
    return m.hexdigest()

"""
获取内容MD5值
"""
def get_str_md5(content):
    m = hashlib.md5(content)  # 创建md5对象

"""
获取内容MD5值
"""
def traverse_files(folder_path):
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            file_md5 = calculate_md5_file(file_path)
            print(file_path)  # 可以根据需求进行其他操作
            print(file_md5)  # 可以根据需求进行其他操作

def calculate_md5_file(file_path):
    md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            md5.update(chunk)
    return md5.hexdigest()


"""
1、预创建
"""
def precreate(access_token, path, file_path):
    """
    precreate
    """
    #    Enter a context with an instance of the API client
    with openapi_client.ApiClient() as api_client:
        # Create an instance of the API class
        api_instance = fileupload_api.FileuploadApi(api_client)

        isdir = 0  # int | isdir
        size = os.path.getsize(file_path) #获取上传文件大小
        autoinit = 1  # int | autoinit

        kilobytes = 1024
        megabytes = kilobytes * 1024
        chunksize = int(4 * megabytes)  # default chunksize
        paths = []
        md5s = []
        if size > chunksize:
            file_diretory = os.path.dirname(file_path) #获取目录路径
            tmp_path = file_diretory + '/tmp/'
            split(file_path, tmp_path, chunksize)
            paths, md5s = get_files_md5(tmp_path)
            # print(md5s)
            list_as_string = str(md5s)
            block_list = list_as_string.replace("'", '"')  # str | 由MD5字符串组成的list
            # print(block_list)
        else:
            block_list = ''  # str | 由MD5字符串组成的list
            file_md5 = get_file_md5(file_path)
            block_list = block_list + '["{}"]' .format(file_md5)  #放入block_list
            # print(block_list)

        rtype = 3  # int | rtype (optional)

        # example passing only required values which don't have defaults set
        # and optional values
        try:
            api_response = api_instance.xpanfileprecreate(
                access_token, path, isdir, size, autoinit, block_list, rtype=rtype)
            pprint(api_response)
            uploadid = api_response['uploadid'] #获取预上传返回的uploadid,传给upload和create函数
            # block_list_id = api_response['block_list']
        except openapi_client.ApiException as e:
            print("Exception when calling FileuploadApi->xpanfileprecreate: %s\n" % e)
        # print(access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths)
        return access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths

"""
2、上传
"""
def upload(uploadid, path, file_path, access_token, paths):
    """
    upload
    """
    # print(uploadid, path, file_path, access_token, paths)

    # Enter a context with an instance of the API client
    with openapi_client.ApiClient() as api_client:
        # Create an instance of the API class
        api_instance = fileupload_api.FileuploadApi(api_client)
        # access_token = "" # str |
        # path = "/apps/hhhkoo/a.txt"  # str |
        # uploadid = ""  # str |
        type = "tmpfile"  # str |

        if len(paths) == 0:
            partseq = '0'
            try:
                file = open(file_path, 'rb') # file_type | 要进行传送的本地文件分片
            except Exception as e:
                print("Exception when open file: %s\n" % e)
                exit(-1)

            # example passing only required values which don't have defaults set
            # and optional values
            try:
                api_response = api_instance.pcssuperfile2(
                    access_token, partseq, path, uploadid, type, file=file)
                pprint(api_response)
            except openapi_client.ApiException as e:
                print("Exception when calling FileuploadApi->pcssuperfile2: %s\n" % e)
        else:
            for index, value in enumerate(paths):
                partseq = str(index)
                print(partseq, value)
                try:
                    file = open(value, 'rb') # file_type | 要进行传送的本地文件分片
                except Exception as e:
                    print("Exception when open file: %s\n" % e)
                    exit(-1)

                # example passing only required values which don't have defaults set
                # and optional values
                try:
                    api_response = api_instance.pcssuperfile2(
                        access_token, partseq, path, uploadid, type, file=file)
                    pprint(api_response)
                except openapi_client.ApiException as e:
                    print("Exception when calling FileuploadApi->pcssuperfile2: %s\n" % e)

"""
3、创建文件
"""
def create(access_token, path, isdir, size, uploadid, block_list, rtype):
    """
    create
    """
    # Enter a context with an instance of the API client
    with openapi_client.ApiClient() as api_client:
        # Create an instance of the API class
        api_instance = fileupload_api.FileuploadApi(api_client)

        # example passing only required values which don't have defaults set
        # and optional values
        try:
            api_response = api_instance.xpanfilecreate(
                access_token, path, isdir, size, uploadid, block_list, rtype=rtype)
            pprint(api_response)
        except openapi_client.ApiException as e:
            print("Exception when calling FileuploadApi->xpanfilecreate: %s\n" % e)


if __name__ == '__main__':

    access_token = "" # str |
    path = "/88.数据超市/A股数据/分钟数据/202408.zip"  # str | 对于一般的第三方软件应用,路径以 "/apps/your-app-name/" 开头。对于小度等硬件应用,路径一般 "/来自:小度设备/" 开头。对于定制化配置的硬件应用,根据配置情况进行填写。
    file_path = "/home/fengbuyu/data_temp/m_data/202408.zip" #要上传的文件的真实路径
    access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths= precreate(access_token, path, file_path)
    upload(uploadid, path, file_path, access_token, paths)
    create(access_token, path, isdir, size, uploadid, block_list, rtype)

fa7dd6d4c31b48729a8d79614460ce90.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AIGC数据超市

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值