文件分片下载

最新推荐文章于 2024-04-29 17:55:25 发布
Crazy丶fff
最新推荐文章于 2024-04-29 17:55:25 发布
阅读量121
点赞数 3
文章标签： python 前端开发语言
本文链接：https://blog.csdn.net/fei7837226/article/details/136889828
版权
1、将文件进行分片，每片10M，以临时文件的方式保存，全部下载完毕之后合并再删除临时文件

2、用多线程下载

3、支持断点续传

4、文件名扩展，如第一次下载test.txt，下一次再下载这个文件，保存的文件名为test(1).txt

5、分片下载完毕之后，先对分片文件进行排序再合并，以免合并写入的时候顺序错误导致文件错误

6、合并之后再对比md5经行校验

7、传参只需要url，文件名，以及此文件的md5

 

package cn.ctyuncdn.service;
 
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.StrPool;
import cn.hutool.core.util.StrUtil;
import cn.hutool.crypto.digest.DigestUtil;
import lombok.extern.slf4j.Slf4j;
 
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
 
@Slf4j
public class MultipartDownload {
 
    /**
     * 文件分片大小（10M）
     */
    private static final int BLOCK_SIZE = 10 * 1024 * 1024;
 
    /**
     * 临时文件后缀，需要根据实际情况修改
     */
    private static final String TEMP_FILE_SUFFIX = StrPool.DOT + "tmp";
 
    /**
     * 文件名称
     */
    private static String FILE_NAME;
 
    /**
     * 下载总目录路径
     */
    private static String DOWNLOAD_PATH;
 
    /**
     * 查询进度的时间间隔，单位为毫秒
     */
    private static final int PROGRESS_UPDATE_INTERVAL = 3000;
 
    /**
     * 线程数
     */
    private static final int THREAD_NUM = 10;
 
    /**
     * 分块下载文件
     *
     * @param url      url
     * @param filename 文件名称
     * @param md5      md5
     * @throws Exception 异常
     */
    public static void download(String url, String filename, String md5) throws Exception {
 
        // 文件名赋值
        FILE_NAME = filename;
        // 获取后缀名
        // String suffix = filename.substring(filename.lastIndexOf(StrPool.DOT) + 1);
        // 获取文件名（不包含后缀）
        String name = filename.substring(0, filename.lastIndexOf(StrPool.DOT));
 
        // 获取连接 得到完整文件的长度
        HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
        long fileSize = connection.getContentLengthLong();
 
        // windows 跟 linux 层级分隔符
        String separator = File.separator;
        // 当前所在目录
        String currentPath = System.getProperty("user.dir");
        // 设置文件下载所在目录 files/xxx
        String filesDirectoryPath = currentPath + separator + "files" + separator;
 
        // 临时目录名 文件名+md5 如：temp/xxx_xxx
        String tempDirectoryName = name + StrPool.UNDERLINE + md5;
 
        // 临时目录路径 temp/文件名+md5/临时文件名 如：temp/0260e4ce2175f7632f543af764e7a3d8/xxx-temp.txt
        String tempDirectoryPath = filesDirectoryPath + "temp" + separator + tempDirectoryName + separator;
 
        // 完整文件保存目录
        DOWNLOAD_PATH = filesDirectoryPath + "download" + separator;
 
        // 将下载文件的大小和分片数量计算出来
        int splitNum = (int) Math.ceil((double) fileSize / BLOCK_SIZE);
        // log.info(">>>总分片数 :_{}", splitNum);
 
        // 先判断目录是否存在 文件保存目录跟临时文件保存目录
        ArrayList<String> directoryPaths = new ArrayList<>();
        directoryPaths.add(tempDirectoryPath);
        directoryPaths.add(filesDirectoryPath);
        directoryPaths.add(DOWNLOAD_PATH);
        for (String directoryPath : directoryPaths) {
            Path path = Paths.get(directoryPath);
            if (!Files.isDirectory(path)) {
                try {
                    // Path absolutePath = path.toAbsolutePath();
                    // // log.info(">>>需要创建文件夹的绝对路径:{}", absolutePath);
                    Files.createDirectories(path);
                    // log.info(">>>文件夹创建成功 ...");
                } catch (IOException e) {
                    e.printStackTrace();
                    return;
                }
            }
        }
 
        // 获取配置文件中的线程配置 启动线程下载每个分片
        ExecutorService executor = Executors.newFixedThreadPool(THREAD_NUM);
        for (int i = 0; i < splitNum; i++) {
            int splitIndex = i;
            String tempFileName = tempDirectoryPath + i + StrPool.UNDERLINE + name + TEMP_FILE_SUFFIX;
            // Path absolutePath = Paths.get(tempFileName).toAbsolutePath();
            // log.info(">>>临时文件的所在位置 :_{} ", absolutePath);
            File outFile = new File(tempFileName);
            executor.execute(() -> {
                try {
                    downloadSplit(url, splitIndex, outFile);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
        }
 
        // 等待所有分片下载完毕
        executor.shutdown();
 
        // 下载总进度条
 
        while (!executor.isTerminated()) {
            try {
                Thread.sleep(PROGRESS_UPDATE_INTERVAL);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
 
            // 统计下载进度
            long totalDownloaded = 0;
            for (int i = 0; i < splitNum; i++) {
                File tempFile = new File(tempDirectoryPath + i + StrPool.UNDERLINE + name + TEMP_FILE_SUFFIX);
                totalDownloaded += tempFile.exists() ? tempFile.length() : 0;
            }
 
            // 已经下载的文件大小
            long downloaded = totalDownloaded;
            double progress = (double) downloaded / fileSize * 100;
            log.info("Downloaded: {}%", String.format("%.2f", progress));
        }
 
        /*用于设定超时时间及单位。
        当等待超过设定时间时，会监测ExecutorService是否已经关闭，若关闭则返回true，否则返回false。
        一般情况下会和shutdown方法组合使用。*/
        executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
 
 
        // 判断分片文件是否全部下载完成
        if (!isAllChunksDownloaded(tempDirectoryPath, splitNum)) {
            log.error("Not all chunks are downloaded yet!");
            return;
        }
 
        // 合并文件
        mergeChunks(tempDirectoryPath, md5);
 
        // 删除临时文件目录以及临时文件
        deleteTempDirectory(tempDirectoryPath);
 
 
        log.info("Download task completed ");
    }
 
    /**
     * 删除临时目录
     *
     * @param tempDirectoryPath 临时目录路径
     * @throws IOException ioexception
     */
    private static void deleteTempDirectory(String tempDirectoryPath) throws IOException {
 
        /*删除目录
        使用Java的文件IO API来遍历目标目录中的每个文件，
        其中使用了Files.walk(directory)方法来遍历目录下的所有文件，
        并且使用了File::delete方法来逐个删除目录下的文件。
        最终，通过sorted()方法来保证我们能够在删除文件之前先删除包含更多文件的目录。*/
 
        Path directory = Paths.get(tempDirectoryPath);
        // 检查目录是否存在
        if (!Files.isDirectory(directory)) {
            // log.info(">>>目录不存在 ...");
            return;
        }
        Files.walk(directory)
                .sorted(Comparator.reverseOrder())
                .map(Path::toFile)
                .forEach(File::delete);
    }
 
 
    /**
     * 分片下载
     *
     * @param url            url
     * @param splitIndex     第几分片
     * @param temporaryFiles 临时文件
     * @throws IOException ioexception
     */
    private static void downloadSplit(String url, int splitIndex, File temporaryFiles) throws IOException {
 
        HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
        connection.setRequestMethod("GET");
        long startByte = (long) splitIndex * BLOCK_SIZE;
        long endByte = (long) (splitIndex + 1) * BLOCK_SIZE - 1;
 
        // 这里判断进行断点续传
        if (temporaryFiles.exists()) {
            // 获取此临时文件还缺少的的部分
            long downloadedBytes = temporaryFiles.length();
            startByte = startByte + downloadedBytes;
            connection.setRequestProperty("Range", "bytes=" + startByte + StrPool.DASHED + endByte);
        } else {
            // 文件不存在说明是第一次下载，不用续传
            connection.setRequestProperty("Range", "bytes=" + startByte + StrPool.DASHED + endByte);
        }
 
 
        /*log.info(">>>此临时文件的起始位置 :_{}", startByte);
        log.info(">>>此临时文件的结束位置 :_{}", endByte);*/
 
        InputStream in = connection.getInputStream();
        RandomAccessFile out = new RandomAccessFile(temporaryFiles, "rw");
        byte[] buffer = new byte[1024];
        int len;
 
        if (temporaryFiles.exists()) {
            // 从尾部继续写入
            out.seek(out.length());
        }
        // 开始写入
        // log.info(">>>开始写入到此临时文件 :_{}", temporaryFiles);
        while ((len = in.read(buffer)) != -1) {
            out.write(buffer, 0, len);
        }
        // 关闭流
        out.close();
        in.close();
 
        // 关闭此连接
        connection.disconnect();
    }
 
 
    /**
     * 将文件分片合并成一个完整的文件
     *
     * @param tempDirectoryPath 分片文件所在的目录
     * @param md5               md5
     * @throws IOException ioexception
     */
    public static void mergeChunks(String tempDirectoryPath, String md5) throws IOException {
        File chunksDir = new File(tempDirectoryPath);
        // 获取分片文件列表
        List<File> chunkFiles = Arrays.stream(Objects.requireNonNull(chunksDir.listFiles((dir, name) -> name.endsWith(".tmp"))))
                .collect(Collectors.toList());
        // 按文件名升序排序
        chunkFiles = chunkFiles.stream().sorted(Comparator.comparingInt(file -> Integer.parseInt(StrUtil.subBefore(file.getName(), StrPool.UNDERLINE, false))))
                .collect(Collectors.toList());
 
        // 文件输出路径
        Path filePath = Paths.get(DOWNLOAD_PATH, FILE_NAME);
        // 判断文件是否存在，如果存在就加数字编号
        int index = 0;
        while (Files.exists(filePath)) {
            index++;
            String newName = addNumberSuffix(FILE_NAME, index);
            filePath = Paths.get(DOWNLOAD_PATH, newName);
        }
 
        File mergedFile= filePath.toFile();
 
        FileOutputStream os = new FileOutputStream(mergedFile, true);
        byte[] buffer = new byte[1024];
        int len;
 
        for (File file : chunkFiles) {
            InputStream is = Files.newInputStream(file.toPath());
            /*log.info(">>>>开始合并文件 ...");
            log.info(">>>>start_merging_files ...");*/
            while ((len = is.read(buffer)) > 0) {
                os.write(buffer, 0, len);
            }
 
            // 输入流关闭
            is.close();
        }
        // 流关闭
        os.close();
        // 合并完成
        String fileMd5 = DigestUtil.md5Hex(mergedFile);
        log.info("Verifying file integrity ");
        if (Objects.equals(fileMd5, md5)) {
            // log.error(">>>> md5值匹配 文件完整");
            log.error("File validation succeeded ");
            Path absolutePath = filePath.toAbsolutePath();
            // log.info(">>>>完整文件保存路径 :_{}", absolutePath);
            log.info("The location where the file is saved : {}", absolutePath);
        } else {
            // log.error(">>>> md5值不匹配 文件损坏");
            log.error("File validation failed ......");
            // 提示出错重新下载 删除掉临时目录跟合并完成的文件
            deleteTempDirectory(tempDirectoryPath);
            FileUtil.del(filePath);
            // 提示。。。
        }
    }
 
    /**
     * 判断分片文件是否全部下载完成
     *
     * @param tempDirectoryPath 临时目录路径
     * @param totalChunks       总分片数
     * @return boolean
     */
    public static boolean isAllChunksDownloaded(String tempDirectoryPath, int totalChunks) {
        File chunksDir = new File(tempDirectoryPath);
        File[] files = chunksDir.listFiles(file -> file.getName().endsWith(".tmp"));
        return files != null && files.length == totalChunks;
    }
 
 
    /**
     * 添加后缀数量 
     * 如第一次下载了 test.txt，下一次再下载此文件，文件保存时文件名为test(1).txt，以此类推
     *
     * @param fileName 文件名称
     * @param index    指数
     * @return {@link String}
     */
    private static String addNumberSuffix(String fileName, int index) {
        // 如果文件名没有后缀，则在文件名后面添加编号
        // 如果有后缀，则在后缀前面添加编号
        int dotIndex = fileName.lastIndexOf(StrPool.DOT);
        StringBuilder sb = new StringBuilder(fileName.length() + 2);
        sb.append(dotIndex >= 0 ? fileName.substring(0, dotIndex) : fileName);
        sb.append("(").append(index).append(")");
        if (dotIndex >= 0) {
            sb.append(fileName.substring(dotIndex));
        }
        return sb.toString();
    }
}