最近想起之前的一个面试题,百万数据如何导出Excel,当时回答的不好,于是现在想好好思考下这个问题,百度了很多,最终我决定使用Mybatis的流式查询和POI的SXSSFWorkbook相结合解决这个问题,项目框架SpringBoot+Mybatis,数据源使用DruidDataSource,以下是依赖包和代码
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
Service实现类
import com.example.springbootswagger2.annotation.TransTx;
import com.example.springbootswagger2.dao.BehaviorLogDao;
import javax.annotation.Resource;
import java.io.FileOutputStream;
import java.util.concurrent.atomic.AtomicInteger;
import com.example.springbootswagger2.pojo.BehaviorLog;
import org.apache.ibatis.cursor.Cursor;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.springframework.stereotype.Service;
/**
* @Author: Jason
* @Date: 2020/12/17
* @Description:
*/
@Service
public class BehaviorLogServiceImpl implements IBehaviorLogService {
private static final Integer PAGE_SIZE = 100000;
@Resource
private BehaviorLogDao behaviorLogDao;
@TransTx // @Transactional
@Override
public void excelExport() {
// SXSSFWorkbook解决大数据量导出
SXSSFWorkbook wb = new SXSSFWorkbook(100);
// 查询总数
final Integer count = behaviorLogDao.findCount();
// 每个sheet页十万数据,计算需要多少个sheet
int sheetCount = count / PAGE_SIZE + 1;
for (int i = 0; i < sheetCount; i++) {
Sheet sheet = wb.createSheet("用户" + i);
// Mybatis流式查询
try (Cursor<BehaviorLog> cursor = behaviorLogDao.findPage(i * 100000, PAGE_SIZE)){
AtomicInteger row = new AtomicInteger();
Row rowTitle = sheet.createRow(row.get());
//创建单元格并设置单元格内容
rowTitle.createCell(0).setCellValue("用户loginName");
rowTitle.createCell(1).setCellValue("入口");
rowTitle.createCell(2).setCellValue("请求地址");
rowTitle.createCell(3).setCellValue("一级模块名称");
rowTitle.createCell(4).setCellValue("二级模块名称");
rowTitle.createCell(5).setCellValue("当前操作");
rowTitle.createCell(6).setCellValue("关键业务参数");
rowTitle.createCell(7).setCellValue("浏览器信息");
// 定义一个Excel的行对象
Row rowContent;
for (BehaviorLog log : cursor) {
rowContent = sheet.createRow(row.incrementAndGet());
rowContent.createCell(0).setCellValue(log.getUserName() == null ? "unknown" : log.getUserName());
rowContent.createCell(1).setCellValue(log.getEntrance() == null ? "unknown" : log.getEntrance());
rowContent.createCell(2).setCellValue(log.getUrl() == null ? "unknown" : log.getUrl());
rowContent.createCell(3).setCellValue(log.getModuleName() == null ? "unknown" : log.getModuleName());
rowContent.createCell(4).setCellValue(log.getSubModuleName() == null ? "unknown" : log.getSubModuleName());
rowContent.createCell(5).setCellValue(log.getOperationName() == null ? "unknown" : log.getOperationName());
rowContent.createCell(6).setCellValue(log.getContent() == null ? "unknown" : log.getContent());
rowContent.createCell(7).setCellValue(log.getBrowserInfo() == null ? "unknown" : log.getBrowserInfo());
}
} catch (Exception e){
e.printStackTrace();
}
}
try (FileOutputStream fileOutputStream = new FileOutputStream("./log.xlsx");) {
wb.write(fileOutputStream);
} catch (Exception e){
e.printStackTrace();
}
}
}
Dao
public interface BehaviorLogDao extends CrudDao<BehaviorLog> {
@Select("select user_name,entrance,url,module_name,sub_module_name,operation_name, content,browser_info from t_behavior_log limit #{startIndex}, #{pageSize}")
Cursor<BehaviorLog> findPage(@Param("startIndex") Integer startIndex, @Param("pageSize") Integer pageSize);
Integer findCount();
}
测试类
@Test
public void export() throws IOException {
System.out.println("开始:" + DateUtil.format(new Date(), DatePattern.NORM_DATETIME_PATTERN));
behaviorLogService.excelExport();
System.out.println("结束:" + DateUtil.format(new Date(), DatePattern.NORM_DATETIME_PATTERN));
}
实测导出两百万数据需要一分四十秒,Sql分页查询有待优化,如果id使用的是自增长,可以使用子查询优化,导出效率还会提升.还可以使用多线程,同时插入多个sheet页,笔者并未实现.
本文只是提供一种导出百万数据的思路,笔者能力有限,哪位大牛有更好的方法还望不吝赐教.
加油(ง •_•)ง,我是一个在学习路上砥砺前行的小菜鸟.