基于百度网盘需要共享股票、期货等行情数据数据,尤其1G左右的大文件居多,所以自己基于网盘提供的sdk包,开发大文件分片上传的代码,方便自动化上传和分享,以下是实现的具体依据和步骤,请大家参考。
Python语言的SDK包
Python SDK使用入门
上传流程简介
上传流程是指,用户将本地文件上传到百度网盘云端服务器的过程。文件上传分为三个阶段:预上传、分片上传、创建文件。第二个阶段分片上传
依赖第一个阶段预上传
的结果,第三个阶段创建文件
依赖第一个阶段预上传
和第二阶段分片上传
的结果,串行完成这三个阶段任务后,本地文件成功上传到网盘服务器。
前置条件 在使用上传接口之前,请确认已完成整个接入流程: 1、你已经登录了百度账号,具体流程参考【注册与登录】。 2、你已经通过了实名认证,具体流程参考【实名认证】。 3、你已经在控制台完成应用创建并获得接入凭证,具体流程参考【创建应用】。 4、你已经为你的应用接入授权,并且获得可用的access_token,具体流程参考【接入授权】。 限制条件 目录限制 每个第三方应用在网盘只能拥有一个文件夹用于存储上传文件,该文件夹必须位于/apps目录下,apps下的文件夹名称为申请接入时填写的申请接入的产品名称。如申请接入的产品名称为云存储,那么该文件夹为/apps/云存储,用户看到的文件夹为/我的应用数据/云存储。 大小限制 所有开发者均可接入使用接口,但可上传单个文件大小根据授权用户的身份有不同的限制: 普通用户单个上传文件大小上限为4GB 会员用户单个上传文件大小上限为10GB 超级会员用户单个上传文件大小上限为20GB 注:分片数量不得超过1024个 类型限制 普通用户在网盘APP端无法上传视频、Live Photo类型的文件。
大文件分片上传的代码如下,填入token就可以跑通亲测有效。其中有几处错序、字符串LIST循环等小bug也修复了。
# !/usr/bin/env python3
"""
xpan upload
include:
precreate
upload
create
"""
import os
import sys
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(BASE_DIR)
from pprint import pprint
from openapi_client.api import fileupload_api
import openapi_client
import pdb
import hashlib
"""
切分指定大小文件到指定路径
"""
def split(fromfile, todir, chunksize = 4*1024*1024):
if not os.path.exists(todir): # check whether todir exists or not
os.mkdir(todir)
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir, fname))
paths = []
partnum = 0
inputfile = open(fromfile, 'rb') # open the fromfile
file_name = os.path.basename(fromfile)
while True:
chunk = inputfile.read(chunksize)
if not chunk: # check the chunk is empty
break
filename = os.path.join(todir, ('%s.part%04d' % (file_name, partnum)))
paths.append(filename)
fileobj = open(filename, 'wb') # make partfile
fileobj.write(chunk) # write data into partfile
fileobj.close()
partnum += 1
return paths
"""
获取文件MD5值
"""
def get_file_md5(file_name):
m = hashlib.md5() # 创建md5对象
with open(file_name, 'rb') as fobj:
while True:
data = fobj.read(4096)
if not data:
break
m.update(data) # 更新md5对象
return m.hexdigest() # 返回md5对象
"""
获取文件夹下文件MD5值
"""
def get_files_md5(dir_path):
paths = []
md5s = []
for file_name in sorted(os.listdir(dir_path)):
path = os.path.join(dir_path, file_name)
if not os.path.isdir(path) and not file_name.startswith('.'):
md5 = get_file_md5(path)
paths.append(path)
md5s.append(md5)
return paths, md5s
"""
获取文件MD5值2
"""
def get_slice_md5(file_name):
m = hashlib.md5()
with open(file_name, 'rb') as fobj:
data = fobj.read(256 * 1024)
m.update(data)
return m.hexdigest()
"""
获取内容MD5值
"""
def get_str_md5(content):
m = hashlib.md5(content) # 创建md5对象
"""
获取内容MD5值
"""
def traverse_files(folder_path):
for root, dirs, files in os.walk(folder_path):
for file in files:
file_path = os.path.join(root, file)
file_md5 = calculate_md5_file(file_path)
print(file_path) # 可以根据需求进行其他操作
print(file_md5) # 可以根据需求进行其他操作
def calculate_md5_file(file_path):
md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
return md5.hexdigest()
"""
1、预创建
"""
def precreate(access_token, path, file_path):
"""
precreate
"""
# Enter a context with an instance of the API client
with openapi_client.ApiClient() as api_client:
# Create an instance of the API class
api_instance = fileupload_api.FileuploadApi(api_client)
isdir = 0 # int | isdir
size = os.path.getsize(file_path) #获取上传文件大小
autoinit = 1 # int | autoinit
kilobytes = 1024
megabytes = kilobytes * 1024
chunksize = int(4 * megabytes) # default chunksize
paths = []
md5s = []
if size > chunksize:
file_diretory = os.path.dirname(file_path) #获取目录路径
tmp_path = file_diretory + '/tmp/'
split(file_path, tmp_path, chunksize)
paths, md5s = get_files_md5(tmp_path)
# print(md5s)
list_as_string = str(md5s)
block_list = list_as_string.replace("'", '"') # str | 由MD5字符串组成的list
# print(block_list)
else:
block_list = '' # str | 由MD5字符串组成的list
file_md5 = get_file_md5(file_path)
block_list = block_list + '["{}"]' .format(file_md5) #放入block_list
# print(block_list)
rtype = 3 # int | rtype (optional)
# example passing only required values which don't have defaults set
# and optional values
try:
api_response = api_instance.xpanfileprecreate(
access_token, path, isdir, size, autoinit, block_list, rtype=rtype)
pprint(api_response)
uploadid = api_response['uploadid'] #获取预上传返回的uploadid,传给upload和create函数
# block_list_id = api_response['block_list']
except openapi_client.ApiException as e:
print("Exception when calling FileuploadApi->xpanfileprecreate: %s\n" % e)
# print(access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths)
return access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths
"""
2、上传
"""
def upload(uploadid, path, file_path, access_token, paths):
"""
upload
"""
# print(uploadid, path, file_path, access_token, paths)
# Enter a context with an instance of the API client
with openapi_client.ApiClient() as api_client:
# Create an instance of the API class
api_instance = fileupload_api.FileuploadApi(api_client)
# access_token = "" # str |
# path = "/apps/hhhkoo/a.txt" # str |
# uploadid = "" # str |
type = "tmpfile" # str |
if len(paths) == 0:
partseq = '0'
try:
file = open(file_path, 'rb') # file_type | 要进行传送的本地文件分片
except Exception as e:
print("Exception when open file: %s\n" % e)
exit(-1)
# example passing only required values which don't have defaults set
# and optional values
try:
api_response = api_instance.pcssuperfile2(
access_token, partseq, path, uploadid, type, file=file)
pprint(api_response)
except openapi_client.ApiException as e:
print("Exception when calling FileuploadApi->pcssuperfile2: %s\n" % e)
else:
for index, value in enumerate(paths):
partseq = str(index)
print(partseq, value)
try:
file = open(value, 'rb') # file_type | 要进行传送的本地文件分片
except Exception as e:
print("Exception when open file: %s\n" % e)
exit(-1)
# example passing only required values which don't have defaults set
# and optional values
try:
api_response = api_instance.pcssuperfile2(
access_token, partseq, path, uploadid, type, file=file)
pprint(api_response)
except openapi_client.ApiException as e:
print("Exception when calling FileuploadApi->pcssuperfile2: %s\n" % e)
"""
3、创建文件
"""
def create(access_token, path, isdir, size, uploadid, block_list, rtype):
"""
create
"""
# Enter a context with an instance of the API client
with openapi_client.ApiClient() as api_client:
# Create an instance of the API class
api_instance = fileupload_api.FileuploadApi(api_client)
# example passing only required values which don't have defaults set
# and optional values
try:
api_response = api_instance.xpanfilecreate(
access_token, path, isdir, size, uploadid, block_list, rtype=rtype)
pprint(api_response)
except openapi_client.ApiException as e:
print("Exception when calling FileuploadApi->xpanfilecreate: %s\n" % e)
if __name__ == '__main__':
access_token = "" # str |
path = "/88.数据超市/A股数据/分钟数据/202408.zip" # str | 对于一般的第三方软件应用,路径以 "/apps/your-app-name/" 开头。对于小度等硬件应用,路径一般 "/来自:小度设备/" 开头。对于定制化配置的硬件应用,根据配置情况进行填写。
file_path = "/home/fengbuyu/data_temp/m_data/202408.zip" #要上传的文件的真实路径
access_token, path, isdir, size, uploadid, block_list, rtype, file_path, paths= precreate(access_token, path, file_path)
upload(uploadid, path, file_path, access_token, paths)
create(access_token, path, isdir, size, uploadid, block_list, rtype)