const readable = fs.createReadStream(filePath,
{ encoding: 'utf-8', highWaterMark: chunkSize })
/**
* 以文件方式上传
* @param {*} readable
* @param {*} fileType
* @param {*} fileFid
* @returns
*/
async function Upload(readable, fileType, fileFid,) {
// 获取文件总大小
let fileSize = fs.statSync(filePath).size
// 分断切割文件,并进行上传(如果最后一份不满足一次分切时,最后两次分割内容合到一起)
let totalChunks = parseInt(fileSize / chunkSize) === 0 ? 1 : parseInt(fileSize / chunkSize)
let actChunks = fileSize / chunkSize
let chunkNumber = 0
let fileName = "sample.jsonl"
let data = '';
let uploadRes;
for await (let chunk of readable) {
_log("check have chunk", chunk.length)
chunkNumber = chunkNumber + 1
if (actChunks > totalChunks && chunkNumber == totalChunks) {
data = chunk
}
else if (chunkNumber === (totalChunks + 1) || actChunks < 1) {
data = data + chunk
_log("totalChunks:", totalChunks, "current:", totalChunks, "chunkSize:", data.length, "file size:", fileSize)
uploadRes = await uploadChunk(fileType, totalChunks, chunkSize, data, fileSize, fileFid, fileName, totalChunks)
_log(JSON.stringify(uploadRes))
return true
} else if (chunkNumber < totalChunks) {
data = chunk
_log("totalChunks:", totalChunks, "current:", chunkNumber, "chunkSize:", data.length, "file size:", fileSize)
uploadRes = await uploadChunk(fileType, chunkNumber, chunkSize, data, fileSize, fileFid, fileName, totalChunks)
_log(JSON.stringify(uploadRes))
}
}
return false
}
/**
* 功能:上传binary
* @param {*} fileType 上传的类型:streams, images, extras
* @param {*} chunkNumber 第几次上传
* @param {*} chunkSize 上传内容的大小
* @param {*} getChunk 上传的binary内容
* @param {*} fileSize 文件总大小
* @param {*} fileFid 文件id
* @param {*} fileName 文件名称
* @param {*} totalChunks 需要上传的次数
* @returns
*/
function uploadChunk(fileType, chunkNumber, chunkSize, getChunk, fileSize, fileFid, fileName, totalChunks) {
let url = '/actions/files/upload?' + `resumableChunkNumber=${chunkNumber}&resumableChunkSize=${chunkSize}&resumableCurrentChunkSize=${getChunk.length}&resumableTotalSize=${fileSize}&resumableType=text%2Fplain&resumableIdentifier=${fileFid}&resumableFilename=${fileName}&resumableRelativePath=${fileName}&resumableTotalChunks=${totalChunks}&type=${fileType}`
const form = new FormData();
form.append('file', getChunk, { "filename": fileName, "contentType": "application/octet-stream" })
APIHeaders(form.getHeaders())
form.append('resumableChunkNumber', chunkNumber)
form.append('resumableChunkSize', chunkSize)
form.append('resumableCurrentChunkSize', getChunk.length)
form.append('resumableTotalSize', fileSize)
form.append('resumableIdentifier', fileFid)
form.append('resumableType', '')
form.append('resumableFilename', fileName)
form.append('resumableRelativePath', fileName)
form.append('resumableTotalChunks', totalChunks)
form.append('type', fileType)
return APIPost(url, form)
}
下面是python版
# 获取文件的MD5值,适用于较大的文件
def getBigFileMD5(fil_path):
md5_obj = hashlib.md5()
max_buf = 8192
f = open(fil_path, 'rb')
while True:
buf = f.read(max_buf)
if not buf:
break
md5_obj.update(buf)
f.close()
file_hash = md5_obj.hexdigest()
log.info("file MD5 is:{}".format(file_hash))
return str(file_hash).upper()
def chunk_and_upload_file(fil_name, fil_path, fil_fid, chunk_size=1024 * 1024):
with open(fil_path, 'rb') as f:
file_size = os.path.getsize(fil_path)
log.info("start upload: %s, file size: %s chunk size: %s" % (fil_path, file_size, chunk_size))
f.seek(0)
total_split = file_size // chunk_size + 1 if file_size % chunk_size != 0 else file_size // chunk_size
TotalChunks = file_size // chunk_size or 1
for i in range(TotalChunks):
chunk_number = i + 1
if total_split != TotalChunks:
if chunk_number == TotalChunks:
chunk = f.read()
upload_file_chunk(chunk, chunk_number, file_size, fil_fid, fil_name, chunk_size)
break
chunk = f.read(chunk_size)
if not chunk:
break
upload_file_chunk(chunk, chunk_number, file_size, fil_fid, fil_name, chunk_size)
log.info("file: %s upload finished" % fil_name)
def upload_file_chunk(chunk, chunk_number, file_size, fil_fid, fil_name, chunk_size):
params = {
"resumableChunkNumber": chunk_number,
"resumableChunkSize": chunk_size,
"resumableCurrentChunkSize": len(chunk),
"resumableTotalSize": file_size,
"resumableIdentifier": str(fil_fid),
"resumableType": "",
"resumableFilename": fil_name,
"resumableRelativePath": fil_name,
"resumableTotalChunks": file_size // chunk_size or 1
}
log.info(params)
log.info("upload chunk:{},size:{}".format(chunk_number, len(chunk)))
headers = get_header()
headers.update({
'Accept': '*/*',
'Sec-Fetch-Site': 'cross-site',
'Sec-Fetch-Mode': 'cors',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
})
file_data = copy(params)
file_data['file'] = (file_path.split('/')[-1:][0], chunk)
encode_data = encode_multipart_formdata(file_data)
file_data = encode_data[0]
headers['Content-Type'] = encode_data[1]
upload_url = baseURL + "/actions/files/upload"
print(type(file_data), "查看类型")
res = my_request.post(
upload_url,
data=file_data,
headers=headers,
verify=False
)
log.info(res.text)
if res.status_code != 200:
log.error(res.text)