java实现百万级大数据量导出
实现方式一
一,分析
excel导出如果数据量过大,会出现两个内存溢出的问题
- 查绚数据量过大,导致内存溢出(可通过分批查绚解决)
- 下载的时候大EXCEL转换的输出流内存溢出;(该方式可以通过新版的SXSSFWorkbook来解决,可通过其构造函数执指定在内存中缓存的行数,剩余的会自动缓存在硬盘的临时目录上,同时,并不会存在页面卡顿的情况);
- 为了能够使用不同的mapper并分批写数据, 采用了外观模式和模板方法模式,大体分三步:
a. 根据总数量生成excel,确定sheet的数量和写标题;
b. 写数据,在可变的匿名内部类中实现写入逻辑;
c. 转换输出流进行下载;
二,代码实现
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
如果是springboot2.0,则不需要poi依赖,如果是1.0,则需要poi依赖,并且poi和poi-ooxml的版本要保持一致
application.properties
spring.mvc.view.prefix=/jsp/
spring.mvc.view.suffix=.jsp
spring.datasource.url=jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf-8
spring.datasource.username=root
spring.datasource.password=123456
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.jpa.database=MYSQL
spring.jpa.show-sql=true
spring.jpa.hibernate.ddl-auto=none
ExcelConstant
package com.example.util;
public class ExcelConstant {
/**
* 每个sheet存储的记录数 100W
*/
public static final Integer PER_SHEET_ROW_COUNT = 1000000;
/**
* 每次向EXCEL写入的记录数(查询每页数据大小) 20W
*/
public static final Integer PER_WRITE_ROW_COUNT = 200000;
/**
* 每个sheet的写入次数 5
*/
public static final Integer PER_SHEET_WRITE_COUNT = PER_SHEET_ROW_COUNT / PER_WRITE_ROW_COUNT;
}
写数据的委托类
package com.example.util;
import org.apache.poi.xssf.streaming.SXSSFSheet;
/**
* @description EXCEL写数据委托类
*/
public interface WriteExcelDataDelegated {
/**
* EXCEL写数据委托类 针对不同的情况自行实现
*
* @param eachSheet 指定SHEET
* @param startRowCount 开始行
* @param endRowCount 结束行
* @param currentPage 分批查询开始页
* @param pageSize 分批查询数据量
* @throws Exception
*/
public abstract void writeExcelData(SXSSFSheet eachSheet, Integer startRowCount, Integer endRowCount, Integer currentPage, Integer pageSize) throws Exception;
}
dateutil工具类
package com.example.util;
import java.text.SimpleDateFormat;
import java.util.Date;
public class DateUtil {
public static final String YYYY_MM_DD_HH_MM_SS = "yyyy-MM-dd HH:mm:ss";
/**
* 将日期转换为字符串
*
* @param date DATE日期
* @param format 转换格式
* @return 字符串日期
*/
public static String formatDate(Date date, String format) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
return simpleDateFormat.format(date);
}
}
poi工具类
package com.example.util;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.servlet.http.HttpServletResponse;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Date;
public class PoiUtil {
private final static Logger logger = LoggerFactory.getLogger(PoiUtil.class);
/**
* 初始化EXCEL(sheet个数和标题)
*
* @param totalRowCount 总记录数
* @param titles 标题集合
* @return XSSFWorkbook对象
*/
public static SXSSFWorkbook initExcel(Integer totalRowCount, String[] titles) {
// 在内存当中保持 100 行 , 超过的数据放到硬盘中在内存当中保持 100 行 , 超过的数据放到硬盘中
SXSSFWorkbook wb = new SXSSFWorkbook(100);
Integer sheetCount = ((totalRowCount % ExcelConstant.PER_SHEET_ROW_COUNT == 0) ?
(totalRowCount / ExcelConstant.PER_SHEET_ROW_COUNT) : (totalRowCount / ExcelConstant.PER_SHEET_ROW_COUNT + 1));
// 根据总记录数创建sheet并分配标题
for (int i = 0; i < sheetCount; i++) {
SXSSFSheet sheet = wb.createSheet("sheet" + (i + 1));
SXSSFRow headRow = sheet.createRow(i);
for (int j = 0; j < titles.length; j++) {
SXSSFCell headRowCell = headRow.createCell(j);
headRowCell.setCellValue(titles[j]);
}
}
return wb;
}
/**
* 下载EXCEL到本地指定的文件夹
*
* @param wb EXCEL对象SXSSFWorkbook
* @param exportPath 导出路径
*/
public static void downLoadExcelToLocalPath(SXSSFWorkbook wb, String exportPath) {
FileOutputStream fops = null;
try {
fops = new FileOutputStream(exportPath);
wb.write(fops);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (null != wb) {
try {
wb.dispose();
} catch (Exception e) {
e.printStackTrace();
}
}
if (null != fops) {
try {
fops.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 下载EXCEL到浏览器
*
* @param wb EXCEL对象XSSFWorkbook
* @param response
* @param fileName 文件名称
* @throws IOException
*/
public static void downLoadExcelToWebsite(SXSSFWorkbook wb, HttpServletResponse response, String fileName) throws IOException {
response.setHeader("Content-disposition", "attachment; filename="+fileName
);//设置下载的文件名
OutputStream outputStream = null;
try {
outputStream = response.getOutputStream();
wb.write(outputStream);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (null != wb) {
try {
wb.dispose();
} catch (Exception e) {
e.printStackTrace();
}
}
if (null != outputStream) {
try {
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 导出Excel到本地指定路径
*
* @param totalRowCount 总记录数
* @param titles 标题
* @param exportPath 导出路径
* @param writeExcelDataDelegated 向EXCEL写数据/处理格式的委托类 自行实现
* @throws Exception
*/
public static final void exportExcelToLocalPath(Integer totalRowCount, String[] titles, String exportPath, WriteExcelDataDelegated writeExcelDataDelegated) throws Exception {
logger.info("开始导出:" + DateUtil.formatDate(new Date(), DateUtil.YYYY_MM_DD_HH_MM_SS));
// 初始化EXCEL
SXSSFWorkbook wb = PoiUtil.initExcel(totalRowCount, titles);
// 调用委托类分批写数据
int sheetCount = wb.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
SXSSFSheet eachSheet = wb.getSheetAt(i);
for (int j = 1; j <= ExcelConstant.PER_SHEET_WRITE_COUNT; j++) {
int currentPage = i * ExcelConstant.PER_SHEET_WRITE_COUNT + j;
int pageSize = ExcelConstant.PER_WRITE_ROW_COUNT;
int startRowCount = (j - 1) * ExcelConstant.PER_WRITE_ROW_COUNT + 1;
int endRowCount = startRowCount + pageSize - 1;
writeExcelDataDelegated.writeExcelData(eachSheet, startRowCount, endRowCount, currentPage, pageSize);
}
}
// 下载EXCEL
PoiUtil.downLoadExcelToLocalPath(wb, exportPath);
logger.info("导出完成:" + DateUtil.formatDate(new Date(), DateUtil.YYYY_MM_DD_HH_MM_SS));
}
/**
* 导出Excel到浏览器
*
* @param response
* @param totalRowCount 总记录数
* @param fileName 文件名称
* @param titles 标题
* @param writeExcelDataDelegated 向EXCEL写数据/处理格式的委托类 自行实现
* @throws Exception
*/
public static final void exportExcelToWebsite(HttpServletResponse response, Integer totalRowCount, String fileName, String[] titles, WriteExcelDataDelegated writeExcelDataDelegated) throws Exception {
logger.info("开始导出:" + DateUtil.formatDate(new Date(), DateUtil.YYYY_MM_DD_HH_MM_SS));
// 初始化EXCEL
SXSSFWorkbook wb = PoiUtil.initExcel(totalRowCount, titles);
// 调用委托类分批写数据
int sheetCount = wb.getNumberOfSheets();
for (int i = 0; i < sheetCount; i++) {
SXSSFSheet eachSheet = wb.getSheetAt(i);
for (int j = 1; j <= ExcelConstant.PER_SHEET_WRITE_COUNT; j++) {
//查询开始页
int currentPage = i * ExcelConstant.PER_SHEET_WRITE_COUNT + j;
//分批查绚数据量
int pageSize = ExcelConstant.PER_WRITE_ROW_COUNT;
//开始行
int startRowCount = (j - 1) * ExcelConstant.PER_WRITE_ROW_COUNT + 1;
//结束行
int endRowCount = startRowCount + pageSize - 1;
writeExcelDataDelegated.writeExcelData(eachSheet, startRowCount, endRowCount, currentPage, pageSize);
}
}
// 下载EXCEL
PoiUtil.downLoadExcelToWebsite(wb, response, fileName);
logger.info("导出完成:" + DateUtil.formatDate(new Date(), DateUtil.YYYY_MM_DD_HH_MM_SS));
}
}
编写DEMO
serviceimpl
public ResultVO export(HttpServletResponse response) {
//获取数据总条数
Integer totalRowCount = excelRepository.findTotal();
String filaName = "用户EXCEL.xlsx";
//对应excel表体列字段
String[] tital = new String[]{"用户id","姓名"};
try {
PoiUtil.exportExcelToWebsite(response, totalRowCount, filaName, tital, new WriteExcelDataDelegated() {
@Override
public void writeExcelData(SXSSFSheet eachSheet, Integer startRowCount, Integer endRowCount, Integer currentPage, Integer pageSize) throws Exception {
//此处进行分批次查询,
List<ExcelModel> list = excelRepository.findExcelModelList(currentPage,pageSize);
if (!list.isEmpty()) {
for (int i = startRowCount; i <= endRowCount; i++) {
SXSSFRow eachDataRow = eachSheet.createRow(i);
if ((i - startRowCount) < list.size()) {
ExcelModel model = list.get(i - startRowCount);
eachDataRow.createCell(0).setCellValue(model.getId());
eachDataRow.createCell(1).setCellValue(model.getPath());
}
}
}
}
});
return new ResultVO();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
测试
经测试100W数据量导出时间73s,后续可进行再优化
实现方式二
直接通过新版的SXSSFWorkbook来解决大数据量导出问题,此方式
简明,但是不适用于过大数据量
public ResultVO export2(HttpServletResponse response) {
try {
// 在内存当中保持 100 行 , 超过的数据放到硬盘中在内存当中保持 100 行 , 超过的数据放到硬盘中
SXSSFWorkbook workbook = new SXSSFWorkbook(100);
workbook.createSheet("sheet1");
SXSSFSheet rows = workbook.getSheetAt(0);
List<ExcelModel> list = excelRepository.finds();
for (int i = 0; i < list.size(); i++) {
rows.createRow(i);
rows.getRow(i).createCell(0).setCellValue(list.get(i).getId());
rows.getRow(i).createCell(1).setCellValue(list.get(i).getPath());
}
// 打开目的输入流,不存在则会创建
OutputStream outputStream = null;
// 打开目的输入流,不存在则会创建
response.setHeader("Content-disposition","attachment; filename= aa.xlsx");
workbook.write(response.getOutputStream());
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
实现方式三
采用阿里开源的(EasyExcel)来实现百万级数据量导出,具体可参考链接
https://blog.csdn.net/qq_35206261/article/details/88579151