背景
目前系统中有一些接口做数据的导出,一开始数据量很小,所以在设计的时候性能什么的都没考虑到;随着数据量的不断增加,逐渐出现了一些问题:
- 耗时增加;
- SQL执行时间过长,占用数据库连接;
- 内存占用过高,导致FullGC频繁。
以及老版POI接口的问题:
- Excel写入过慢;
- 每个Sheet仅支持65536条数据;
- 容易导致OOM。
方案
为了解决以上问题,引入POI的SXSSFWorkbook
,设计了一个基于生产-消费者模式
的方案:
- 先查询条件下的数据总量T;
- 根据总量T,将SQL查询任务分片(每片任务量1W),得到任务数量N=T/1W;
- 将N个SQL查询任务交予线程池执行,得到异步结果集合
List<Future<T>>
; - 遍历异步结果集合,将每个异步结果交给生产者线程(数量N)处理;
- 生产者线程阻塞在异步任务线程,等待其执行完成,然后将结果放入阻塞队列(阻塞队列长度10,以此限制同时存在于内存中的结果数量);如果阻塞队列已满,则休眠一段时间,否则取出存入阻塞队列;
- 消费者线程(基于POI的特性,只设置一条线程)在阻塞队列中取出1组数据(每组数据<=1W条),利用POI生成Cell并写入数据,在处理完1组数据后,将数据从内存flush到硬盘以节省内存,并将该组数据的引用置空便于GC;
- 消费者线程在消费完所有的任务数据后结束,主线程将所有的Row数据写入到Response中,Excel导出完成。
效果
数据量(单位:万条) | 耗时(单位:秒) |
---|---|
1 | 2.61 |
3 | 5.26 |
5 | 5.78 |
10 | 10.51 |
20 | 18.31 |
50 | 45.43 |
实现
主线程
public void getFinanceMonthly(@RequestParam("startDate") String startDateStr, @RequestParam("endDate") String endDateStr, HttpServletResponse response) {
//......省略前置操作
Long dataQueryStartTS = System.currentTimeMillis();
log.info("查询并构建数据开始");
//获取当前条件下数据总条数
Integer total = orderQueryService.getFinanceMonthlyStatTotal(startDate, endDate);
//......省略一些操作
InputStream fs = new ClassPathResource("template/orderFinanceStatistics.xlsx").getInputStream();
XSSFWorkbook wb = new XSSFWorkbook(fs);
//引入SXSSFWorkbook,利用其高效的Excel数据处理特性
SXSSFWorkbook sxssfWorkbook = new SXSSFWorkbook(wb, 1000);
//......省略一些操作
//根据总条数设置任务数量和生产者、消费者个数
Integer taskCount = total % BATCH_AMOUNT == 0 ? total / BATCH_AMOUNT : (total / BATCH_AMOUNT) + 1;
//把任务分片后交给线程池执行并得到Future集合
ExecutorService sqlTaskExecutorService = Executors.newFixedThreadPool(5);
List<Future<List<OrderFinanceStatisticsDTO>>> futureResultList = new ArrayList<>(taskCount);
for (int i = 1; i <= taskCount; i++){
StatisticsTask task = new StatisticsTask(orderQueryService, i, startDate, endDate);
Future<List<OrderFinanceStatisticsDTO>> futureResult = sqlTaskExecutorService.submit(task);
futureResultList.add(futureResult);
}
sqlTaskExecutorService.shutdown();
//遍历Future集合,交给生产者处理,等待结果后放入阻塞队列
ArrayBlockingQueue<List<OrderFinanceStatisticsDTO>> queue = new ArrayBlockingQueue<>(10);
ExecutorService producerExecutorService = Executors.newFixedThreadPool(5);
for (int i = 0; i < taskCount; i++) {
Producer producer = new Producer(queue, futureResultList.get(i), "producer_"+i);
producerExecutorService.submit(producer);
}
producerExecutorService.shutdown();
//消费者开始消费阻塞队列中数据
log.info("开始写入数据");
Long dataWriteStartTS = System.currentTimeMillis();
Consumer consumer = new Consumer(queue, sxssfWorkbook, total);
consumer.start();
//确保主线程的后续操作在消费者线程结束(即数据都写入row)后再执行
consumer.join();
log.info(String.format("数据写入结束,耗时:%s ms", System.currentTimeMillis() - dataWriteStartTS));
sxssfWorkbook.write(writer);
writer.flush();
sxssfWorkbook.dispose();
Long dataQueryEndTS = System.currentTimeMillis();
log.info(String.format("数据查询构建成功,耗时:%s ms", dataQueryEndTS - dataQueryStartTS));
}catch (IOException e){
//......省略一些操作
}
}
任务
public class StatisticsTask implements Callable<List<OrderFinanceStatisticsDTO>> {
private OrderQueryService orderQueryService;
private Integer page;
private Date startDate;
private Date endDate;
/**
* 初始化任务类
* @param orderQueryService
* @param page
* @param startDate
* @param endDate
*/
public StatisticsTask(OrderQueryService orderQueryService, Integer page, Date startDate, Date endDate) {
//对数据进行分页查询
this.orderQueryService = orderQueryService;
this.page = (page - 1) * StatisticsController.BATCH_AMOUNT;
this.startDate = startDate;
this.endDate = endDate;
}
@Override
public List<OrderFinanceStatisticsDTO> call() throws Exception {
return orderQueryService.getFinanceMonthlyStat(startDate, endDate, page);
}
}
生产者
public class Producer extends Thread {
private static final Logger log = LoggerFactory.getLogger(Producer.class);
//阻塞队列,防止多线程问题以及限制内存使用量
private ArrayBlockingQueue<List<OrderFinanceStatisticsDTO>> queue;
private Future<List<OrderFinanceStatisticsDTO>> futureResult;
public Producer(ArrayBlockingQueue<List<OrderFinanceStatisticsDTO>> queue, Future<List<OrderFinanceStatisticsDTO>> futureResult, String threadName) {
super(threadName);
this.queue = queue;
this.futureResult = futureResult;
}
@Override
public void run() {
Long startTime = System.currentTimeMillis();
while (true) {
try {
if (futureResult.isCancelled()) {
break;
}
if (futureResult.isDone()) {
//如果队列已满,生产者休眠300ms
if (queue.remainingCapacity() < 1) {
log.info(String.format("生产者线程[%s]: 队列已满,休眠300ms", this.getName()));
Thread.sleep(300);
continue;
}
List<OrderFinanceStatisticsDTO> orderFinanceStatisticsDTOList = futureResult.get();
Long endTime = System.currentTimeMillis();
log.info(String.format("生产者线程[%s]成功获取1W条数据,耗时:[%s]ms", this.getName(), (endTime - startTime)));
//将数据放入阻塞队列
queue.put(orderFinanceStatisticsDTOList);
log.info(String.format("生产者线程[%s]成功入队1W条数据,在队列阻塞:[%s]ms", this.getName(), System.currentTimeMillis() - endTime));
break;
} else {
log.info(String.format("生产者线程[%s]: 查询未结束,休眠300ms", this.getName()));
Thread.sleep(300);
}
} catch (InterruptedException | ExecutionException e) {
log.error(String.format("生产者线程[%s]失败-->", this.getName()), e);
break;
}
}
}
}
消费者
public class Consumer extends Thread{
private static final Logger log = LoggerFactory.getLogger(Consumer.class);
private ArrayBlockingQueue<List<OrderFinanceStatisticsDTO>> queue;
private Integer count;
private Integer total;
private SXSSFWorkbook sxssfWorkbook;
private Integer times;
public Consumer(ArrayBlockingQueue<List<OrderFinanceStatisticsDTO>> queue, SXSSFWorkbook sxssfWorkbook, Integer total) {
this.queue = queue;
this.total = total;
this.count = 0;
this.sxssfWorkbook = sxssfWorkbook;
this.times = 1;
}
@Override
public void run() {
try {
while (true) {
if (count >= total){
break;
}
log.info(String.format("消费者第[%s]次从队列中取出数据,准备写入Excel", this.times));
Long dataWriteStartTS = System.currentTimeMillis();
List<OrderFinanceStatisticsDTO> financeStatisticsDTOList = queue.take();
log.info(String.format("消费者线程[%s]成功获取1W条数据,在队列阻塞:[%s]ms", this.getName(), System.currentTimeMillis() - dataWriteStartTS));
//根据总条数确定消费者本次写入数据时的sheet和开始行数
int index = count % StatisticsController.MAX_ROW_SHEET == 0 ? (count + 2) : (count % StatisticsController.MAX_ROW_SHEET + 2);
count += financeStatisticsDTOList.size();
Integer sheetNum = count / StatisticsController.MAX_ROW_SHEET;
Sheet sheet = sxssfWorkbook.getSheetAt(sheetNum);
writeExcel(financeStatisticsDTOList, sheet, index);
Long dataWriteEndTS = System.currentTimeMillis();
log.info(String.format("消费者第[%s]次写入Excel完成,耗时%s ms", this.times, (dataWriteEndTS - dataWriteStartTS)));
this.times++;
}
} catch (InterruptedException e) {
log.error("消费者[%s]获取队列数据异常-->",e);
}
}
private void writeExcel(List<OrderFinanceStatisticsDTO> financeStatisticsDTOList, Sheet sheet, Integer index) {
if (CollectionUtils.isEmpty(financeStatisticsDTOList)){
log.info(String.format("消费者[%s]: 数据为空", this.getName()));
return;
}
for (int i = 0; i < financeStatisticsDTOList.size(); i++) {
Row row = sheet.createRow(index++);
OrderFinanceStatisticsDTO statisticsDTO = financeStatisticsDTOList.get(i);
row.createCell(0).setCellValue(statisticsDTO.getOrderDate());
//.....省略设置表格数据的代码
}
//处理完以后将数据引用置空便于GC
financeStatisticsDTOList.clear();
}
}
SQL
<select id="getFinanceMonthlyStat" resultMap="OrderFinanceStatisticsMap" timeout="20">
SELECT
# .....省略字段和表连接代码
WHERE
go.order_time BETWEEN #{monthStartDate} AND #{monthEndDate}
AND go.`status` IN (3, 4, 5, 6, 7)
AND go.sales_type IN (1, 2, 4)
AND go.is_deleted = 'N'
ORDER BY go.order_time DESC
# 数据分页
LIMIT #{page,jdbcType=INTEGER},10000
</select>
愿景
在此方案的基础上,利用组合方法
、模板设计
模式,将任务、生产者、消费者更加通用化、框架化;使用者只需实现特定接口并重写SQL任务
、数据处理
(如果是多表联查也可以不实现)、Excel数据写入
等方法,即可轻松接入该方案。
愿景实现
目前已实现愿景,具体使用方式如下所示:
主控制类
下载Controller
中,对于SXSSFWorkbook
之前的代码需自己完成;对于框架类的使用,只需要构建查询参数并初始化导出框架类启动即可:
@RequestMapping(value = "/monthly", method = RequestMethod.GET)
public void getFinanceMonthly(@RequestParam("startDate") String startDateStr, @RequestParam("endDate") String endDateStr,
HttpServletResponse response) {
ServletOutputStream writer = null;
log.info(String.format("getFinanceMonthly(),日期:{%s}--{%s}", startDateStr, endDateStr));
Date startDate = new Date(startDateStr);
Date endDate = new Date(endDateStr);
Long dataQueryStartTS = System.currentTimeMillis();
log.info("查询并构建数据开始");
//获取当前条件下数据总条数
Integer total = orderQueryService.getFinanceMonthlyStatTotal(startDate, endDate);
if (total == null || total == 0) {
writer.write("数据为空".getBytes("gbk"));
writer.flush();
return;
}
String fileName = String.format("Monthly_Finance_%s.xlsx", endDateStr);
InputStream fs = new ClassPathResource("template/orderFinanceStatistics.xlsx").getInputStream();
XSSFWorkbook wb = new XSSFWorkbook(fs);
SXSSFWorkbook sxssfWorkbook = new SXSSFWorkbook(wb, 1000);
response.setCharacterEncoding("utf-8");
response.setContentType("application/vnd.ms-excel");
response.setHeader("Content-Disposition", "attachment;fileName="
+ new String(fileName.getBytes("UTF-8"), "ISO8859-1"));
//根据总条数设置任务数量和生产者、消费者个数
Integer taskCount = total % BATCH_AMOUNT == 0 ? total / BATCH_AMOUNT : (total / BATCH_AMOUNT) + 1;
//构建框架的请求参数
Map<String, String> businessParams = new HashMap<>();
businessParams.put("startDate", startDateStr);
businessParams.put("endDate", endDateStr);
//初始化导出框架
ExportConfigThread configThread = new ExportConfigThread("财务统计", total, BATCH_AMOUNT, businessParams,
sxssfWorkbook, orderQueryService, writerExcelService);
//启动导出线程并使主线程等待该线程完成
configThread.start();
configThread.join();
Long dataWriteStartTS = System.currentTimeMillis();
log.info(String.format("数据写入结束,耗时:%s ms", System.currentTimeMillis() - dataWriteStartTS));
sxssfWorkbook.write(writer);
writer.flush();
sxssfWorkbook.dispose();
Long dataQueryEndTS = System.currentTimeMillis();
log.info(String.format("数据查询构建成功,耗时:%s ms", dataQueryEndTS - dataQueryStartTS));
}catch (IOException e){
log.error("writer() 异常-->",e);
} catch (InterruptedException e) {
log.error("消费者异常--->",e);
} finally {
if (writer != null){
try {
writer.close();
} catch (IOException e) {
log.error("关闭writer 异常-->",e);
}
}
}
}
实现数据查询接口
public interface TaskService<T> {
/**
* 查询数据,可以只查左表(需在下面的方法中进行数据的关联处理);也可以关联查询
* @param businessParams
* @param offset
* @param limit
* @return
*/
List<T> queryData(Map<String, String> businessParams, Integer offset, Integer limit);
/**
* 如果在SQL里进行了多表联查,那么该方法只需要实现空方法体即可
* 否则请在该方法中对数据进行关联处理
* @param majorDataList
* @param businessParams
* @return
*/
List<T> processData(List<T> majorDataList, Map<String, String> businessParams);
}
public interface OrderQueryService extends TaskService<OrderFinanceStatisticsDTO>;
@Override
public List<OrderFinanceStatisticsDTO> queryData(Map<String, String> businessParams, Integer offset, Integer limit) {
return getFinanceMonthlyStat(new Date(businessParams.get("startDate")), new Date(businessParams.get("endDate")), offset);
}
@Override
public List<OrderFinanceStatisticsDTO> processData(List<OrderFinanceStatisticsDTO> majorDataList, Map<String, String> businessParams) {
//Do nothing
return null;
}
实现Excel数据操作接口
public interface WriterService<T> {
void write2Rows(List<T> list, Sheet sheet, Integer index);
}
@Component
public class WriterExcelServiceImpl implements WriterService<OrderFinanceStatisticsDTO> {
@Override
public void write2Rows(List<OrderFinanceStatisticsDTO> financeStatisticsDTOList, Sheet sheet, Integer index) {
if (CollectionUtils.isEmpty(financeStatisticsDTOList)){
return;
}
for (int i = 0; i < financeStatisticsDTOList.size(); i++) {
Row row = sheet.createRow(index++);
OrderFinanceStatisticsDTO statisticsDTO = financeStatisticsDTOList.get(i);
row.createCell(0).setCellValue(statisticsDTO.getOrderDate());
row.createCell(1).setCellValue(statisticsDTO.getOrderId());
row.createCell(2).setCellValue(statisticsDTO.getProductName());
row.createCell(3).setCellValue(statisticsDTO.getProductCombinationName());
row.createCell(4).setCellValue(statisticsDTO.getCustomName());
row.createCell(5).setCellValue(statisticsDTO.getPayMoney());
row.createCell(6).setCellValue(statisticsDTO.getProductPayMoney());
row.createCell(7).setCellValue(statisticsDTO.getFreight());
row.createCell(8).setCellValue(statisticsDTO.getPaymentType() == null ? ""
: statisticsDTO.getPaymentType().getDesc());
row.createCell(9).setCellValue(statisticsDTO.getPaymentNo());
row.createCell(10).setCellValue(statisticsDTO.getRefundMoney());
row.createCell(11).setCellValue(statisticsDTO.getCostPrice());
row.createCell(12).setCellValue(statisticsDTO.getStatus());
row.createCell(13).setCellValue(statisticsDTO.getSendDate());
row.createCell(14).setCellValue(statisticsDTO.getReceiveDate());
row.createCell(15).setCellValue(statisticsDTO.getReportDate());
row.createCell(16).setCellValue(statisticsDTO.getSampleCode());
row.createCell(17).setCellValue(statisticsDTO.getSupplierName());
row.createCell(18).setCellValue(statisticsDTO.getSendSupplierName());
row.createCell(19).setCellValue(statisticsDTO.getSalesType() == null ? ""
: statisticsDTO.getSalesType().getName());
row.createCell(20).setCellValue(statisticsDTO.getThirdOrderCode());
if (statisticsDTO.getOriginalOrderId() != null) {
row.createCell(21).setCellValue(statisticsDTO.getOriginalOrderId());
}
if (statisticsDTO.getNewOrderId() != null) {
row.createCell(22).setCellValue(statisticsDTO.getNewOrderId());
}
row.createCell(23).setCellValue(statisticsDTO.getOperateType() == null ? ""
: statisticsDTO.getOperateType().getValue());
row.createCell(24).setCellValue(statisticsDTO.getRemark());
row.createCell(25).setCellValue(statisticsDTO.getSendSampleDate());
if (statisticsDTO.getOrderTempId() != null) {
row.createCell(26).setCellValue(statisticsDTO.getOrderTempId());
}
if (statisticsDTO.getSettleAmount() != null) {
row.createCell(27).setCellValue(statisticsDTO.getSettleAmount());
}
if (statisticsDTO.getSettleBatchNo() != null) {
row.createCell(28).setCellValue(statisticsDTO.getSettleBatchNo());
}
if (statisticsDTO.getSettleStatusDesc() != null) {
row.createCell(29).setCellValue(statisticsDTO.getSettleStatusDesc());
}
}
}
}