在我们的工作中,上传功能是一个比较常见的功能,但是当上传文件过大就可能上传不成功,或者花费时间过长或失败。
这个时候我就需要将大文件进行分割成小文件上传,然后在合并成一个大文件,提供上传的容错率。
现将大文件上传功能记录与此。
实现逻辑:
1.将文件分割成n个文件,并将他们全部上传到服务器,可以给文件hash一个值,确保n个文件是同一个文件的一部分。
2.上传完成后,服务器进行合并,根据hash。
具体实现:
1.前段html index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>upload file</title>
</head>
<body id="app">
<h1 style="text-align: center">大文件切片上传-实例</h1>
<form method="post" enctype="multipart/form-data" onsubmit="return false" style="left: 10vw;position: relative;display: flex;height: 30vh;flex-direction: column;width: 80vw;margin: 20px;text-align: center;">
<input type="file" id="file" name="ff" multiple="multiple" style="margin-left: 30px"/><br/>
<input type="submit" value="提交" id="xx" onclick="upload()" style="margin-left: 30px;width:70px"/>
</form>
<div style="height: 30px; width:80vw;left: 10vw;position: relative;"><span>上传过程:</span></div>
<div style="display: block;height: 40vh; width:80vw;overflow: scroll; background: darkgray;left: 10vw;position: relative;">
<textarea id="ct" style="height: 100%;width:100%;"></textarea>
</div>
</body>
<script src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-1.8.0.js"></script>
<script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/spark-md5/3.0.0/spark-md5.min.js"></script>
<script >
const chunkSize = 2 * 1024 * 1024; // 每个chunk的大小,设置为2兆
const blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
const hashFile = (file) => {
return new Promise((resolve, reject) => {
const chunks = Math.ceil(file.size / chunkSize);
let currentChunk = 0;
const spark = new SparkMD5.ArrayBuffer();
const fileReader = new FileReader();
const loadNext = () => {
const start = currentChunk * chunkSize;
const end = start + chunkSize >= file.size ? file.size : start + chunkSize;
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}
fileReader.onload = e => {
spark.append(e.target.result); // Append array buffer
currentChunk++;
if (currentChunk < chunks) {
loadNext();
ct = document.getElementById("ct")
ct.textContent = ct.textContent + `第${currentChunk}分片解析完成,开始解析${currentChunk + 1}分片\n\r`
console.log(`第${currentChunk}分片解析完成,开始解析${currentChunk + 1}分片`);
} else {
console.log('finished loading');
const result = spark.end();
// 如果单纯的使用result 作为hash值的时候, 如果文件内容相同,而名称不同的时候
// 想保留两个文件无法保留。所以把文件名称加上。
const sparkMd5 = new SparkMD5();
sparkMd5.append(result);
sparkMd5.append(file.name);
const hexHash = sparkMd5.end();
resolve(hexHash);
}
};
fileReader.onerror = () => {
console.warn('文件读取失败!');
};
loadNext();
}).catch(err => {
console.log(err);
});
}
const upload = async () => {
const fileDom = $('#file')[0];
// 获取到的files为一个File对象数组,如果允许多选的时候,文件为多个
const files = fileDom.files;
const file = files[0];
if (!file) {
alert('没有获取文件');
return;
}
// alert("文件大小:"+ file.size / 1024 / 1024)
// console.log(file)
const blockCount = Math.ceil(file.size / chunkSize); // 分片总数
const axiosPromiseArray = []; // axiosPromise数组
const hash = await hashFile(file); //文件 hash
// 获取文件hash之后,如果需要做断点续传,可以根据hash值去后台进行校验。
// 看看是否已经上传过该文件,并且是否已经传送完成以及已经上传的切片。
for (let i = 0; i < blockCount; i++) {
const start = i * chunkSize;
const end = start + chunkSize >= file.size ? file.size : start + chunkSize;
// 构建表单
const form = new FormData();
form.append('file', blobSlice.call(file, start, end));
form.append('name', file.name);
form.append('total', blockCount);
form.append('index', i);
form.append('size', file.size);
form.append('hash', hash);
console.log(blockCount, blobSlice.call(file, start, end), i, start, end, file.size);
// ajax提交 分片,此时 content-type 为 multipart/form-data
const axiosOptions = {
onUploadProgress: e => {
// 处理上传的进度
// console.log(blockCount, i, e, file);
ct = document.getElementById("ct")
ct.textContent = ct.textContent + `第${i}分片上传完成\n\r`
},
};
// 加入到 Promise 数组中
axiosPromiseArray.push(axios.post('/uploadFile', form, axiosOptions));
}
await axios.all(axiosPromiseArray).then((result) => {
// 合并chunks
const data = {
size: file.size,
name: file.name,
total: blockCount,
hash
};
const form = new FormData();
form.append('size', file.size);
form.append('name', file.name);
form.append('total', blockCount);
form.append('hash', hash);
console.log(result);
axios.post("/file/chunks", form).then(res => {
//console.log(res)
ct = document.getElementById("ct")
ct.textContent = ct.textContent + `上传完成\n\r`
console.log("全部上传完毕");
})
}).catch((err) => {
});
}
</script>
</html>
2.后端代码
package main
import (
"bufio"
"encoding/json"
"fmt"
"html/template"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
)
var dir, _ = os.Getwd()
var uploadPath = path.Join(dir, "uploads")
var uploadTempPath = path.Join(uploadPath, "temp")
// 加载html前段页面
func home(w http.ResponseWriter, r *http.Request) {
r.ParseForm()
t, err := template.ParseFiles("static/index.html")
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
t.Execute(w, "")
return
}
// PathExists 判断文件夹是否存在
func PathExists(path string) (bool, error) {
_, err := os.Stat(path)
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
func uploadFile(w http.ResponseWriter, r *http.Request) {
file, _, err := r.FormFile("file")
index := r.PostFormValue("index")
hash := r.PostFormValue("hash")
// 获取uploads下所有的文件夹
nameList, err := ioutil.ReadDir(uploadPath)
m := map[string]interface{}{
"code": 46900,
"msg": "文件已上传",
}
result, _ := json.MarshalIndent(m, "", " ")
// 循环判断hash是否在文件里如果有就返回上传已完成
for _, name := range nameList {
tmpName := strings.Split(name.Name(), "_")[0]
if tmpName == hash {
fmt.Fprintf(w, string(result))
return
}
}
chunksPath := path.Join(uploadTempPath, hash, "/")
isPathExists, err := PathExists(chunksPath)
if !isPathExists {
err = os.MkdirAll(chunksPath, os.ModePerm)
}
destFile, err := os.OpenFile(path.Join(chunksPath+"/"+hash+"-"+index), syscall.O_CREAT|syscall.O_WRONLY, 0777)
reader := bufio.NewReader(file)
writer := bufio.NewWriter(destFile)
buf := make([]byte, 1024*1024) // 1M buf
for {
n, err := reader.Read(buf)
if err == io.EOF {
writer.Flush()
break
} else if err != nil {
return
} else {
writer.Write(buf[:n])
}
}
defer file.Close()
defer destFile.Close()
if err != nil {
log.Fatal("%v", err)
}
fmt.Printf("第%s:%s块上传完成\n", index, destFile.Name())
}
// 合并文件
func chunks(w http.ResponseWriter, r *http.Request) {
size, _ := strconv.ParseInt(r.PostFormValue("size"), 10, 64)
hash := r.PostFormValue("hash")
name := r.PostFormValue("name")
toSize, _ := getDirSize(path.Join(uploadTempPath, hash, "/"))
if size != toSize {
fmt.Fprintf(w, "文件上传错误")
}
chunksPath := path.Join(uploadTempPath, hash, "/")
files, _ := ioutil.ReadDir(chunksPath)
// 排序
filesSort := make(map[string]string)
for _, f := range files {
nameArr := strings.Split(f.Name(), "-")
filesSort[nameArr[1]] = f.Name()
}
saveFile := path.Join(uploadPath, name)
if exists, _ := PathExists(saveFile); exists {
os.Remove(saveFile)
}
fs, _ := os.OpenFile(saveFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, os.ModeAppend|os.ModePerm)
var wg sync.WaitGroup
filesCount := len(files)
if filesCount != len(filesSort) {
fmt.Fprintf(w, "文件上传错误2")
}
wg.Add(filesCount)
for i := 0; i < filesCount; i++ {
// 这里一定要注意按顺序读取不然文件就会损坏
fileName := path.Join(chunksPath, "/"+filesSort[strconv.Itoa(i)])
data, err := ioutil.ReadFile(fileName)
fmt.Println(err)
fs.Write(data)
wg.Done()
}
wg.Wait()
os.RemoveAll(path.Join(chunksPath, "/"))
m := map[string]interface{}{
"code": 20000,
"msg": "上传成功",
}
result, _ := json.MarshalIndent(m, "", " ")
fmt.Fprintf(w, string(result))
defer fs.Close()
}
// 获取整体文件夹大小
func getDirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if !info.IsDir() {
size += info.Size()
}
return err
})
return size, err
}
func main() {
http.HandleFunc("/", home) // set router
http.HandleFunc("/uploadFile", uploadFile)
http.HandleFunc("/file/chunks", chunks)
err := http.ListenAndServe(":8080", nil) // set listen port
if err != nil {
log.Fatal("Error while starting GO http server on port - 8080 : ", err) //log error and exit in case of error at server boot up
}
}
效果:
JS改进:
将文件分割和上传同步进行,提升整体上传速度
const chunkSize = 2 * 1024 * 1024; // 每个chunk的大小,设置为2兆
const blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
const spark = new SparkMD5.ArrayBuffer();
const getHash = (file) => {
const result = spark.end();
const sparkMd5 = new SparkMD5();
sparkMd5.append(result);
sparkMd5.append(file.name);
return sparkMd5.end();
}
const uploadFile = async (file, hash) => {
return
}
const upload = async () => {
const fileDom = $('#file')[0];
// 获取到的files为一个File对象数组,如果允许多选的时候,文件为多个
const files = fileDom.files;
const file = files[0];
if (!file) {
alert('没有获取文件');
return;
}
const blockCount = Math.ceil(file.size / chunkSize); // 分片总数
const axiosPromiseArray = []; // axiosPromise数组
const hash = getHash(file); //文件 hash
// 获取文件hash之后,如果需要做断点续传,可以根据hash值去后台进行校验。
// 看看是否已经上传过该文件,并且是否已经传送完成以及已经上传的切片。
new Promise((resolve, reject) => {
const chunks = Math.ceil(file.size / chunkSize);
let currentChunk = 0;
const fileReader = new FileReader();
const loadNext = () => {
const start = currentChunk * chunkSize;
const end = start + chunkSize >= file.size ? file.size : start + chunkSize;
// 构建表单
const form = new FormData();
form.append('file', blobSlice.call(file, start, end));
form.append('name', file.name);
form.append('total', blockCount);
form.append('index', currentChunk);
form.append('size', file.size);
form.append('hash', hash);
// ajax提交 分片,此时 content-type 为 multipart/form-data
const axiosOptions = {
onUploadProgress: e => {
ct = document.getElementById("ct")
ct.textContent = ct.textContent + `第${currentChunk}分片上传完成\n\r`
},
};
// 加入到 Promise 数组中
axiosPromiseArray.push(axios.post('/uploadFile', form, axiosOptions));
}
while (currentChunk < chunks){
loadNext()
currentChunk++;
}
}).catch(err => {
console.log(err);
});
await axios.all(axiosPromiseArray).then((result) => {
// 合并chunks
const data = {
size: file.size,
name: file.name,
total: blockCount,
hash
};
const form = new FormData();
form.append('size', file.size);
form.append('name', file.name);
form.append('total', blockCount);
form.append('hash', hash);
console.log(result);
axios.post("/file/chunks", form).then(res => {
//console.log(res)
ct = document.getElementById("ct")
ct.textContent = ct.textContent + `上传完成\n\r`
console.log("全部上传完毕");
})
}).catch((err) => {
});
}