前端⽂件上传的分析

原理概述

前端文件上传这个可以说是一个老生常谈的话题了,其中主要用的是全栈的思维,和对http协议 、node⽂件处理的深入了解。
在这里使用vue+element+nodejs来演示这个。

1.上传功能初步实现

formData

    <input type="file" @change="handleFileChange" />
    <el-button type="primary" @click="handleUpload">上传</el-button>
    handleFileChange(e) {
        const [file] = e.target.files;
        if (!file) return;
        form.append("filename", this.container.file.name);
        form.append("file", this.container.file);
        request({
            url: '/upload',
            data: form,
        })
    }

node

    const http = require("http")
    const path = require('path')
    const Controller = require('./controller')
    const schedule = require('./schedule')
    const server = http.createServer()
    const UPLOAD_DIR = path.resolve(__dirname, "..", "target"); // ⼤⽂件存储⽬录
    const ctrl = new Controller(UPLOAD_DIR)
    server.on("request", async (req, res)
        => {
        res.setHeader("Access-Control-AllowOrigin", "*")
        res.setHeader("Access-Control-AllowHeaders", "*")
        if (req.method === "OPTIONS") {
            res.status = 200
            res.end()
            return
        }
        if (req.method === "POST") {
            if (req.url == '/upload') {
                await ctrl.handleUpload(req, res)
                return
            }
        }
    })
    server.listen(3000, () =>
        console.log("正在监听 3000 端⼝"))

Controller.js

    async handleUpload(req, res) {
        const multipart = new multiparty.Form()
        multipart.parse(req, async (err, field, file) => {
            if (err) {
                console.log(err)
                return
            }
            const [chunk] = file.file
            const [filename] = field.filename
            const filePath = path.resolve(this.UPLOAD_DIR, `${fileHash}${extractExt(filename)}`)
            const chunkDir = path.resolve(this.UPLOAD_DIR, fileHash)
            // ⽂件存在直接返回
            if (fse.existsSync(filePath)) {
                res.end("file exist")
                return
            }
            if (!fse.existsSync(chunkDir)) {
                await fse.mkdirs(chunkDir)
            }
            await fse.move(chunk.path, `${chunkDir}/${hash}`)
            res.end("received file chunk")
        })
    }

技术点汇总

FormData 对象的使用:
1.用一些键值对来模拟一系列表单控件:即把form中所有表单元素的name与value组装成一个queryString
2. 异步上传二进制文件。
httpserver:开启本地服务
fs⽂件处理
multiparty解析post数据

2.增加拖拽,粘贴功能

技术点:拖拽事件drop,clipboardData

<div class="drop-box" id="drop-box">
    box.addEventListener("drop", function (e) {
        e.preventDefault(); //取消浏览器默认拖拽效果
        var fileList = e.dataTransfer.files; //获取拖拽中的⽂件对象
        var len = fileList.length;//⽤来获取⽂件的⻓度(其实是获得⽂件数量)
        const [file] = e.target.files;
        if (!file) return;
        ...上传
    }, false);
粘贴
    box.addEventListener('paste', function (event) {
        var data = (event.clipboardData)
        ....
    });

3.大文件上传

技术点:blob.slice分片
    const chunks = this.createFileChunk(this.container.file);
    createFileChunk(file, size = SIZE)
    {
        // ⽣成⽂件块
        const chunks = [];
        let cur = 0;
        while (cur < file.size) {
            chunks.push({
                file:
                    file.slice(cur, cur + size)
            });
            cur += size;
        }
        return chunks;
    }

在这里插入图片描述

  • 所有切⽚挨个发请求,然后merge
    async handleMerge(req, res) {
        const data = await resolvePost(req)
        const { fileHash, filename, size } = data
        const ext = extractExt(filename)
        const filePath = path.resolve(this.UPLOAD_DIR, `${fileHash}${ext}`)
        await this.mergeFileChunk(filePath, fileHash, size)
        res.end(
            JSON.stringify({
                code: 0,
                message: "file merged success"
            })
        )
    }

4.断点续传+秒传

技术点:

md5计算,缓存思想 ⽂件⽤md5计算⼀个指纹,上传之前,先问后端,这个⽂件的hash在不在,在的话就不⽤传了,就是所谓的断点续传,如果整个⽂件都存在了就是秒传

async handleVerify(req, res) {
        const data = await resolvePost(req)
        const { filename, hash } = data
        const ext = extractExt(filename)
        const filePath = path.resolve(this.UPLOAD_DIR, `${hash}${ext}`)
        // ⽂件是否存在
        let uploaded = false
        let uploadedList = []
        if (fse.existsSync(filePath)) {
            uploaded = true
        } else {
            // ⽂件没有完全上传完毕,但是可能存在部分切⽚上传完毕了
            uploadedList = await
            getUploadedList(path.resolve(this.UPLOAD_DIR, hash))
        }
        res.end(
            JSON.stringify({
                uploaded,
                uploadedList // 过滤诡异的隐藏⽂件
            })
        )
    }

5.计算hash优化

技术点:web-worker

⼤⽂件的md5太慢了,启⽤webworker计算

// web-worker
    self.importScripts('spark-md5.min.js')
    self.onmessage = e => {
        // 接受主线程的通知
        const { chunks } = e.data
        const spark = new self.SparkMD5.ArrayBuffer()
        let progress = 0
        let count = 0
        const loadNext = index => {
            const reader = new FileReader()
            reader.readAsArrayBuffer(chunks[index].file)
            reader.onload = e => {
                // 累加器 不能依赖index,
                count++
                // 增量计算md5
                spark.append(e.target.result)
                if (count === chunks.length) {
                    // 通知主线程,计算结束
                    self.postMessage({
                        progress: 100,
                        hash: spark.end()
                    })
                } else {
                    // 每个区块计算结束,通知进度即可
                    progress += 100 / chunks.length
                    self.postMessage({
                        progress
                    })
                    // 计算下⼀个
                    loadNext(count)
                }
            }
        }
        // 启动
        loadNext(0)
    }

6.time-slice

技术点:react fiber架构学习,利⽤浏览器空闲时间

requestIdleCallback
在这里插入图片描述

    requestIdelCallback(myNonEssentialWork);
    function myNonEssentialWork(deadline) {
        // deadline.timeRemaining()可以获取
        到当前帧剩余时间
        // 当前帧还有时间 并且任务队列不为空
        while (deadline.timeRemaining() > 0 && tasks.length > 0) {
            doWorkIfNeeded();
        }
        if (tasks.length > 0) {
            requestIdleCallback(myNonEssentialWork
            );
        }
    }
    async calculateHashIdle(chunks) {
        return new Promise(resolve => {
            const spark = new SparkMD5.ArrayBuffer();
            let count = 0;
            // 根据⽂件内容追加计算
            const appendToSpark = async file => {
                return new Promise(resolve => {
                    const reader = new FileReader();
                    reader.readAsArrayBuffer(file);
                    reader.onload = e => {
                        spark.append(e.target.result);
                        resolve();
                    };
                });
            };
            const workLoop = async deadline => {
                // 有任务,并且当前帧还没结束
                while (count < chunks.length
                    && deadline.timeRemaining() > 1) {
                    await
                        appendToSpark(chunks[count].file);
                    count++;
                    // 没有了 计算完毕
                    if (count < chunks.length) {
                        // 计算中
                        this.hashProgress =
                            Number(((100 * count) / chunks.length).toFixed(2));
                        console.log(this.hashProgress)
                    } else {
                        // 计算完毕
                        this.hashProgress = 100;
                        resolve(spark.end());
                    }
                }
                window.requestIdleCallback(workLoop);
            };
            window.requestIdleCallback(workLoop);
        });
    }

7.抽样hash

布隆过滤器思想

    async calculateHashSample() {
        return new Promise(resolve => {
            const spark = new SparkMD5.ArrayBuffer();
            const reader = new FileReader();
            const file = this.container.file;
            // ⽂件⼤⼩
            const size = this.container.file.size;
            let offset = 2 * 1024 * 1024;
            let chunks = [file.slice(0, offset)];
            // 前⾯100K
            let cur = offset;
            while (cur < size) {
                // 最后⼀块全部加进来
                if (cur + offset >= size) {
                    chunks.push(file.slice(cur, cur + offset));
                } else {
                    // 中间的 前中后去两个字节
                    const mid = cur + offset / 2;
                    const end = cur + offset;
                    chunks.push(file.slice(cur, cur + 2));
                    chunks.push(file.slice(mid, mid + 2));
                    chunks.push(file.slice(end - 2, end));
                }
                // 前取两个字节
                cur += offset;
            }
            // 拼接
            reader.readAsArrayBuffer(new Blob(chunks));
            reader.onload = e => {
                spark.append(e.target.result);
                resolve(spark.end());
            };
        });
    }

8.请求并发数控制和重试

    async sendRequest(forms, max = 4) {
        return new Promise(resolve => {
            const len = forms.length;
            let idx = 0;
            let counter = 0;
            const start = async () => {
                // 有请求,有通道
                while (idx < len && max > 0) {
                    max--; // 占⽤通道
                    console.log(idx, "start");
                    const form = forms[idx].form;
                    const index = forms[idx].index;
                    idx++
                    request({
                        url: '/upload',
                        data: form,
                        onProgress: this.createProgresshandler(this.chunks[index]),
                        requestList: this.requestList
                    }).then(() => {
                        max++; // 释放通道
                        counter++;
                        if (counter === len) {
                            resolve();
                        } else {
                            start();
                        }
                    });
                }
            }
            start();
        });
    }
    async uploadChunks(uploadedList = []) {
        // 这⾥⼀起上传,碰⻅⼤⽂件就是灾难
        // 没被hash计算打到,被⼀次性的tcp链接把浏览器稿挂了
        // 异步并发控制策略
        // ⽐如并发量控制成4
        const list = this.chunks
            .filter(chunk =>
                uploadedList.indexOf(chunk.hash) == -1)
            .map(({ chunk, hash, index }, i) => {
                const form = new FormData();
                form.append("chunk", chunk);
                form.append("hash", hash);
                form.append("filename", this.container.file.name);
                form.append("fileHash", this.container.hash);
                return { form, index };
            })
            .map(({ form, index }) =>
                request({
                    url: "/upload",
                    data: form,
                    onProgress: this.createProgresshandler(this.chunks[index]),
                    requestList: this.requestList
                })
            );
        // 直接全量并发
        await Promise.all(list);
        // 控制并发
        const ret = await
        this.sendRequest(list, 4)
        if (uploadedList.length + list.length === this.chunks.length) {
            // 上传和已经存在之和 等于全部的再合并
            await this.mergeRequest();
        }
    }

9.慢启动策略

TCP拥塞控制的问题 其实就是根据当前⽹络情况,动态调整切⽚的⼤⼩

  • chunk中带上size值,不过进度条数量不确定了,修改createFileChunk, 请求加上时间统计
  • ⽐如我们理想是30秒传递⼀个
  • 初始⼤⼩定为1M,如果上传花了10秒,那下⼀个区块⼤⼩变成3M
  • 如果上传花了60秒,那下⼀个区块⼤⼩变成500KB以此类推
  • 并发+慢启动的逻辑有些复杂,所以先⼀次只传⼀个切⽚,来演示这个逻辑,新建⼀个handleUpload1函数
    async handleUpload1(){
        // @todo数据缩放的⽐率 可以更平缓
        // @todo 并发+慢启动
        // 慢启动上传逻辑
        const file = this.container.file
        if (!file) return;
        this.status = Status.uploading;
        const fileSize = file.size
        let offset = 1024 * 1024
        let cur = 0
        let count = 0
        this.container.hash = await
        this.calculateHashSample();
        while (cur < fileSize) {
            // 切割offfset⼤⼩
            const chunk = file.slice(cur, cur + offset)
            cur += offset
            const chunkName = this.container.hash + "-" + count;
            const form = new FormData();
            form.append("chunk", chunk);
            form.append("hash", chunkName);
            form.append("filename", file.name);
            form.append("fileHash", this.container.hash);
            form.append("size", chunk.size);
            let start = new Date().getTime()
            await request({
                url: '/upload', data: form
            })
            const now = new Date().getTime()
            const time = ((now - start) / 1000).toFixed(4)
            let rate = time / 30
            // 速率有最⼤2和最⼩0.5
            if (rate < 0.5) rate = 0.5
            if (rate > 2) rate = 2
            // 新的切⽚⼤⼩等⽐变化
            console.log(`切⽚${count}⼤⼩是${this.format(offset)},耗时${time}秒,是30秒的${rate}倍,修正⼤⼩为${this.format(offset / rate)}`)
            // 动态调整offset
            offset = parseInt(offset / rate)
            // if(time)
            count++
        }
    }

切⽚0⼤⼩是1024.00KB,耗时13.2770秒,是30秒的0.5倍,修正⼤⼩为2.00MB
切⽚1⼤⼩是2.00MB,耗时25.4130秒,是30秒的0.8471倍,修正⼤⼩为2.36MB
切⽚2⼤⼩是2.36MB,耗时14.1260秒,是30秒的0.5倍,修正⼤⼩为4.72MB

10.碎片清理

    // 为了⽅便测试,我改成每5秒扫⼀次, 过期1分钟的删除做演示
    const fse = require('fs-extra')
    const path = require('path')
    const schedule = require('nodeschedule')
    // 空⽬录删除
    function remove(file, stats) {
        const now = new Date().getTime()
        const offset = now - stats.ctimeMs
        if (offset > 1000 * 60) {
            // ⼤于60秒的碎⽚
            console.log(file, '过期了,浪费空间,删除')
            fse.unlinkSync(file)
        }
    }
    async function scan(dir, callback) {
        const files = fse.readdirSync(dir)
        files.forEach(filename => {
            const fileDir = path.resolve(dir, filename)
            const stats = fse.statSync(fileDir)
            if (stats.isDirectory()) {
                return scan(fileDir, remove)
            }
            if (callback) {
                callback(fileDir, stats)
            }
        })
    }

    let start = function (UPLOAD_DIR) {
        // 每5秒
        schedule.scheduleJob("*/5 * * * * * ", function () {
            console.log('开始扫描')
            scan(UPLOAD_DIR)
        })
    }
    exports.start = start

开始扫描
/upload/target/625c…/625c…-0 过期了,删除
/upload/target/625c…/625c…-1 过期了,删除
/upload/target/625c…/625c…-10 过期了,删除
/upload/target/625c…/625c…-11 过期了,删除
/upload/target/625c…/625c…-12 过期了,删除

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值