Excel 多线程导出大数据 50W + 数据

醋椒豆腐

已于 2022-04-05 15:14:06 修改

阅读量8.2k

点赞数 6

分类专栏： Java 文章标签： java 后端

于 2022-04-05 14:55:11 首次发布

本文链接：https://blog.csdn.net/weixin_44748605/article/details/123965559

版权

Java 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

Excel 多线程导出大文件

- 前言
- Work_easyExcel多线程大数据导出
- Demo_Excel多线程大数据导出
- 多快

- 前言

对于 10W+ 以上的数据导出，可以有以下思路：

异步导出（@Async 注解 + @EnableAsync 注解）
分片导出，利用多线程每 2W 条数据打包成一个 Excel ;
将这些Excel 打包成一个 Zip流传到阿里云 OSS 上；
待生成 zip文件之后，通过MQ 或者其他模式消息通知到用户，导出失败或者成功；

提示：这里主要讲解如果利用多线程导出大数据 excel （文件不落地），并生成 zip 流上传

- Work_easyExcel多线程大数据导出

Pom.xml

maven+1.8jdk+poi+easyExcel

		<!--xls-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>
        <!--xlsx-->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>2.2.6</version>
        </dependency>

代码如下：

利用多线程生成 Excel 流

import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.metadata.BaseRowModel;
import com.alibaba.excel.metadata.Sheet;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.write.metadata.holder.WriteWorkbookHolder;
import lombok.extern.log4j.Log4j;
import org.apache.poi.ss.usermodel.Workbook;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;


/**
 * 导出new线程池工具类
 */
@Log4j
public class PageThreadPool<E> implements Runnable {

    private final CountDownLatch countDownLatch;
    private String title;
    private Class<? extends BaseRowModel> clazz;
    private Map<String, byte[]> byteList;
    private List<E> list;

    public PageThreadPool(CountDownLatch countDownLatch, String title, List<E> list, Class<? extends BaseRowModel> clazz, Map<String, byte[]> byteList) {
        this.countDownLatch = countDownLatch;
        this.title = title;
        this.list = list;
        this.clazz = clazz;
        this.byteList = byteList;
    }

    @Override
    public void run() {
        ByteArrayOutputStream bos = null;
        ExcelWriter writer = null;
        Workbook workbook;
        try {
            Sheet sheet1 = new Sheet(1, 0, clazz);
            sheet1.setSheetName("sheet1");
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            writer = new ExcelWriter(out, ExcelTypeEnum.XLSX);
            writer.write(list, sheet1);
            workbook = writer.writeContext().getWorkbook();
            WriteWorkbookHolder holder = writer.writeContext().writeWorkbookHolder();
            holder.setAutoCloseStream(true);
            bos = new ByteArrayOutputStream();
            workbook.write(bos);
            bos.flush();
            out.flush();
            out.close();
            //put 文件名和文件字节数组
            byteList.put(this.title, bos.toByteArray());
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (bos != null) {
                    bos.close();
                }
                if (writer != null) {
                    writer.writeContext().finish(true);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            if (countDownLatch != null) {
                countDownLatch.countDown();
            }
        }
    }
}

根据每次生成 Excel 文档的行数计算线程池的大小

import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.metadata.BaseRowModel;
import com.alibaba.excel.metadata.Sheet;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.aliyun.oss.OSSClient;
import lombok.extern.log4j.Log4j;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.mongodb.core.BulkOperations;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; 

public class bigDataExport{

	 /**
     * excel 最大行数
     */
    private static final Integer BIG_EXCEL_ROWS = 20000;
    
 	/**
     * excel导出
     *
     * @param list         数据列表
     * @param titleSerialN 生成Excel标题
     * @param object       样式
     * @param <E>
     * @return
     */
    public static <E> ByteArrayInputStream bigDataExport(List<E> list, String titleSerialN, Class<? extends BaseRowModel> object) {

        ByteArrayInputStream is;
        byte[] buffer;
        Map<String, byte[]> byteList = new HashMap<>();
        //大于多少行 进行多线程操作
        if (list.size() > BIG_EXCEL_ROWS) {
            //页数
            int pageNum = list.size() / BIG_EXCEL_ROWS;
            //取余
            int lastCount = list.size() % BIG_EXCEL_ROWS;
            // 计算几页
            int page = lastCount == 0 ? pageNum : pageNum + 1;
            //倒计时锁
            CountDownLatch downLatch = new CountDownLatch(page);
            //定义线程池 按sheet设置线程池量
            ExecutorService executor = Executors.newFixedThreadPool(page);
            List<E> subList;
            for (int c = 0; c <= pageNum; c++) {
                int rowNum = BIG_EXCEL_ROWS;
                String title = titleSerialN + "_" + (c + 1) + ".xlsx";
                if (c == pageNum) {
                    if (lastCount == 0) {
                        continue;
                    }
                    subList = list.subList(c * rowNum, c * rowNum + lastCount);
                } else {
                    subList = list.subList(c * rowNum, (c + 1) * rowNum);
                }
                //动态生成文件名：
                executor.execute(new PageThreadPool(downLatch, title, subList, object, byteList));
            }
            try {
                downLatch.await();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            executor.shutdown();
        } else {
            ExcelWriter writer = null;
            Workbook workbook = null;
            ByteArrayOutputStream out = null;
            ByteArrayOutputStream bos = null;
            try {
                Sheet sheet1 = new Sheet(1, 0, object);
                sheet1.setSheetName("sheet1");
                out = new ByteArrayOutputStream();
                writer = new ExcelWriter(out, ExcelTypeEnum.XLSX);
                writer.write(list, sheet1);
                workbook = writer.writeContext().getWorkbook();
                bos = new ByteArrayOutputStream();
                workbook.write(bos);
                byteList.put(titleSerialN + ".xlsx", bos.toByteArray());
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    if (out != null) {
                        out.close();
                    }
                    if (bos != null) {
                        bos.close();
                    }
                    if (writer != null) {
                        writer.writeContext().finish(true);
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        buffer = ZipUtils.zipFileSteam(byteList);
        is = new ByteArrayInputStream(buffer);
        return is;
    }
}

将生成的 Excel 流压缩为 Zip流


import lombok.extern.log4j.Log4j;
import java.io.*;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;


/**
 * 压缩zip工具类
 */
@Log4j
public class ZipUtils {

    /**
     * 压缩工具类
     *
     * @param byteList 文件字节码Map，k:fileName，v：byte[]
     * @return 返回压缩流
     */
    public static byte[] zipFileSteam(Map<String, byte[]> byteList) {
        byte[] buffer = null;
        ByteArrayOutputStream bos = null;
        try {
            bos = new ByteArrayOutputStream();
            ZipOutputStream zipOutputStream = new ZipOutputStream(bos);
            byteList.forEach((k, v) -> {
                //写入一个条目，我们需要给这个条目起个名字，相当于起一个文件名称
                try {
                    zipOutputStream.putNextEntry(new ZipEntry(k));
                    zipOutputStream.write(v);
                } catch (IOException e) {
                    e.printStackTrace();
                    log.info(StringUtil.join(LogConstant.SERVICE, LogConstant.RESULT, LogConstant.FAIL, JSONUtil.toStr(e.getMessage())));
                }
            });
            //关闭条目
            zipOutputStream.closeEntry();
            zipOutputStream.close();
            buffer = bos.toByteArray();
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            try {
                bos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return buffer;
    }

样式实体类

import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
import com.alibaba.excel.metadata.BaseRowModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import java.io.Serializable;
import java.math.BigDecimal;


@Data
public class style extends BaseRowModel implements Serializable {
    /**
     * 日期 YYYY/MM/DD HH:MM:SS
     */
    @ExcelProperty(value="时间", index = 0)
    @ColumnWidth(12)
    @ApiModelProperty(value = "时间")
    private String dateTime;

    @ExcelProperty(value="交易时间", index = 1)
    @ColumnWidth(12)
    @ApiModelProperty(value = "交易时间")
    private String dealDatetime;

    @ExcelProperty(value="个人账户", index = 2)
    @ColumnWidth(15)
    @ApiModelProperty(value = "个人账户")
    private String accountName;
 
    @ExcelProperty(value="组名", index = 3)
    @ColumnWidth(15)
    @ApiModelProperty(value = "组名")
    private String groupName;

调用


    public static void main(String[] args) {
        /**
         *
         * list 从数据查出来的数据源
         *
         * is   打印 输出一个字节流
         *
         *  AnpayAccountMoreDynamicBalanceDetailGridOut 样式
         */
        String titleSerialN = "生成的文档压缩.zip";
        List<?> list = new ArrayList();
        InputStream is = null;
        is = bigDataExport(list, titleSerialN, style.class);
        System.out.println("is = " + is);
    }

注意：

当数据导出数量为 3W 时，项目启动后，第一次，第二次…直到 第四次时导出会抛出warn异常,但是都不会影响结果的导出，也没有准确报出代码中的行数
当数据导出 60W 时，项目启动后，第一次导出可以，第二次导出时会抛出抛出warn异常，也没有准确报出代码中的行数，但是都不会影响结果的导出

2022-04-01 16:44:38.885 [Finalizer] WARN  com.alibaba.excel.ExcelWriter - Destroy object failed
com.alibaba.excel.exception.ExcelGenerateException: Can not close IO.
	at com.alibaba.excel.context.WriteContextImpl.finish(WriteContextImpl.java:378)
	at com.alibaba.excel.write.ExcelBuilderImpl.finish(ExcelBuilderImpl.java:95)
	at com.alibaba.excel.ExcelWriter.finish(ExcelWriter.java:329)
	at com.alibaba.excel.ExcelWriter.finalize(ExcelWriter.java:340)
	at java.lang.System$2.invokeFinalize(System.java:1270)
	at java.lang.ref.Finalizer.runFinalizer(Finalizer.java:102)
	at java.lang.ref.Finalizer.access$100(Finalizer.java:34)
	at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:217)
Caused by: java.io.IOException: Stream closed
	at java.io.BufferedWriter.ensureOpen(BufferedWriter.java:116)
	at java.io.BufferedWriter.flushBuffer(BufferedWriter.java:126)
	at java.io.BufferedWriter.flush(BufferedWriter.java:253)
	at org.apache.poi.xssf.streaming.SheetDataWriter.close(SheetDataWriter.java:127)
	at org.apache.poi.xssf.streaming.SXSSFSheet.getWorksheetXMLInputStream(SXSSFSheet.java:98)
	at org.apache.poi.xssf.streaming.SXSSFWorkbook.injectData(SXSSFWorkbook.java:389)
	at org.apache.poi.xssf.streaming.SXSSFWorkbook.write(SXSSFWorkbook.java:936)
	at com.alibaba.excel.context.WriteContextImpl.finish(WriteContextImpl.java:339)
	... 7 common frames omitted

解决 Bug ，在 finally 中可以在 writeContext 强制抑制抛出异常，具体代码可见上面：
在这里插入图片描述

- Demo_Excel多线程大数据导出

Pom.xml

		<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>ooxml-schemas</artifactId>
            <version>1.4</version>
        </dependency>

代码

通过计算数据源条数计算线程池的大小

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;

public class BigDataExport {

    public static void main(String[] args) {
        int size = 70__0000;
        List<Integer> list = new ArrayList<>();
        for (int i = 0; i < size; i++) {
            list.add(1);
        }
        //生成 excel 文件名
        String title = "GLOOX-SCATTER-CHART_SXSSFW_";
        //生成 ZIP 目录
        String zipFilePath = "/Users/fico/Downloads/";
        //生成 ZIP 文件名
        String zipFileName = "ZIP";
        long timeMillis = System.currentTimeMillis();
        System.out.println("开始 [导出Excel+打包ZIP] 时间为：" + timeMillis);
        bigDataExport(list, title, zipFilePath, zipFileName);
        System.out.println("结束 [导出Excel+打包ZIP] 时间为：" + (System.currentTimeMillis() - timeMillis));

    }

    /**
     * 默认一个文件 2W
     */
    public static final int BIG_EXCEL_ROWS = 2__0000;

    public static <E> void bigDataExport(List<E> list, String titleSerialN, String zipFilePath, String zipFileName) {
        long timeMillis = System.currentTimeMillis();
        System.out.println("开始 导出excel 时间为：" + timeMillis);
        Map<String, byte[]> byteList = new HashMap<>();
        //大于多少行 进行多线程操作
        if (list.size() > BIG_EXCEL_ROWS) {
            //页数
            int pageNum = list.size() / BIG_EXCEL_ROWS;
            //取余
            int lastCount = list.size() % BIG_EXCEL_ROWS;
            // 计算几页
            int page = lastCount == 0 ? pageNum : pageNum + 1;
            //倒计时锁
            CountDownLatch downLatch = new CountDownLatch(page);
            //定义线程池 按 page 设置线程池量
            int processor = Runtime.getRuntime().availableProcessors();
            ExecutorService executor = Executors.newFixedThreadPool(page);
            List<E> subList;
            for (int c = 0; c <= pageNum; c++) {
                int rowNum = BIG_EXCEL_ROWS;
                String title = titleSerialN + "_" + (c + 1) + ".xlsx";
                if (c == pageNum) {
                    if (lastCount == 0) {
                        continue;
                    }
                    subList = list.subList(c * rowNum, c * rowNum + lastCount);
                } else {
                    subList = list.subList(c * rowNum, (c + 1) * rowNum);
                }
                //动态生成文件名：
                executor.execute(new PageThreadPool(downLatch, title, subList, byteList));
            }
            try {
                downLatch.await();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            executor.shutdown();
        }
        System.out.println("结束 导出excel 时间为：" + (System.currentTimeMillis() - timeMillis));
        if (byteList != null) {
            ZipUtils.zipFileSteam(byteList, zipFilePath, zipFileName);
        }
    }
}

利用多线程 + SXSSFWorkbook （excel 大文件一定要用这个）生成 excel 流放入 Map<excel文件名,excel流>

import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;


/**
 * 导出new线程池工具类
 */
public class PageThreadPool<E> implements Runnable {

    private CountDownLatch countDownLatch;
    private String title;
    private Map<String, byte[]> byteList;
    private List<E> list;

    public PageThreadPool(CountDownLatch countDownLatch, String title, List<E> list, Map<String, byte[]> byteList) {
        this.countDownLatch = countDownLatch;
        this.title = title;
        this.list = list;
        this.byteList = byteList;
    }

    @Override
    public void run() {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        SXSSFWorkbook workbook = null;
        try {
            //默认100行，超100行将写入临时文件
            workbook = new SXSSFWorkbook();
            //压缩临时文件，很重要，否则磁盘很快就会被写满
            workbook.setCompressTempFiles(true);
            SXSSFSheet sheet = workbook.createSheet("sheet");
            for (int j = 0; j < list.size(); j++) {
                SXSSFRow row = sheet.createRow(j);
                for (int k = 0; k < 10; k++) {
                    row.createCell(k).setCellValue(new Random().nextInt(10));
                }
            }
            workbook.write(bos);
            //put 文件名和文件字节数组
            byteList.put(this.title, bos.toByteArray());
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (bos != null) {
                    bos.close();
                }
                // 删除临时文件，很重要，否则磁盘可能会被写满
                if (workbook != null) {
                    workbook.dispose();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            if (countDownLatch != null) {
                countDownLatch.countDown();
            }
        }
    }
}

将 Map<K,V> 压缩成ZIP 流并生成本地文件

import java.io.*;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

public class ZipUtils {

    /**
     * 压缩工具类
     *
     * @param byteList 文件字节码Map，k:fileName，v：byte[]
     * @return 返回压缩流
     */

    /**
     *
     * @param byteList     文件字节码Map，k:fileName，v：byte[]
     * @param zipFilePath  ZIP 生成目录
     * @param zipFileName  ZIP 文件名
     */
    public static void zipFileSteam(Map<String, byte[]> byteList,String zipFilePath,String zipFileName) {
        long timeMillis = System.currentTimeMillis();
        System.out.println("开始生成 ZIP 开始时间为：" + timeMillis);
        //如果文件夹不存在就创建文件夹，防止报错
        File file = new File(zipFilePath);
        if (!file.exists() && !file.isDirectory()) {
            System.out.println("文件夹不存在,创建新文件夹!");
            file.mkdirs();
        }
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(String.format("%s%s%s", zipFilePath, zipFileName, ".zip"));
            ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);
            byteList.forEach((k, v) -> {
                //写入一个条目，我们需要给这个条目起个名字，相当于起一个文件名称
                try {
                    zipOutputStream.putNextEntry(new ZipEntry(k));
                    zipOutputStream.write(v);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            //关闭条目
            zipOutputStream.closeEntry();
            zipOutputStream.close();
            System.out.println("结束生成 ZIP 结束时间为：" + (System.currentTimeMillis() - timeMillis));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 将文件转换成byte数组
     *
     * @param filePath 文件File类 通过new File(文件路径)
     * @return byte数组
     */
    public static byte[] File2byte(File filePath) {
        byte[] buffer = null;
        FileInputStream fis = null;
        ByteArrayOutputStream bos = null;
        try {
            fis = new FileInputStream(filePath);
            bos = new ByteArrayOutputStream();
            byte[] b = new byte[1024];
            int n;
            while ((n = fis.read(b)) != -1) {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (fis != null) {
                    fis.close();
                }
                if (bos != null) {
                    bos.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return buffer;
    }


}

- 多快

导出数据条数为：700200 条
开始 [导出Excel+打包ZIP] 时间（毫秒）为：1649140344506
开始导出excel 时间（毫秒）为：1649140344506
结束导出excel 时间（毫秒）为：9581
开始生成 ZIP 开始时间（毫秒）为：1649140354097
结束生成 ZIP 结束时间（毫秒）为：2258
结束 [导出Excel+打包ZIP] 时间（毫秒）为：11849