EasyExcel针对比较大的数据量合并策略
使用EasyExcel合并单元格,非常耗费资源,目前想到的优化思路有三点:
1.在合并的时候,先确定哪些单元格要合并,获取需要合并的单元格索引
2.使用addMergedRegionUnsafe(CellRangeAddress region)方法,这里的不安全主要是少了两部分校验:
2.1.校验单元格是否有公式引用;
2.2.校验是否与已合并区域合并;
3.private int addMergedRegion(CellRangeAddress region, boolean validate)方法底层是要返回合并区域的索引,是做了线程安全处理的,如果用不到可以直接重写该方法不返回。
策略实现
目前使用的是行处理实现,以前也试过单元格处理实现,单元格实现太精细不合适,以下是切合本人业务的具体实现:
import com. alibaba. excel. write. handler. RowWriteHandler ;
import com. alibaba. excel. write. metadata. holder. WriteSheetHolder ;
import com. alibaba. excel. write. metadata. holder. WriteTableHolder ;
import org. apache. poi. ss. usermodel. Cell ;
import org. apache. poi. ss. usermodel. CellType ;
import org. apache. poi. ss. usermodel. Row ;
import org. apache. poi. ss. usermodel. Sheet ;
import org. apache. poi. ss. util. CellRangeAddress ;
import org. apache. poi. xssf. usermodel. XSSFSheet ;
import org. openxmlformats. schemas. spreadsheetml. x2006. main. CTMergeCell ;
import org. openxmlformats. schemas. spreadsheetml. x2006. main. CTMergeCells ;
import org. openxmlformats. schemas. spreadsheetml. x2006. main. CTWorksheet ;
import java. lang. reflect. Field ;
public class RowWriterStrategy implements RowWriteHandler {
private Integer segTotalNum;
private int count;
private int [ ] mergeColumnIndex;
private int mergedTotalCount = 0 ;
public RowWriterStrategy ( int [ ] mergeColumnIndex) {
this . mergeColumnIndex = mergeColumnIndex;
}
public RowWriterStrategy ( ) {
}
@Override
public void beforeRowCreate ( WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Integer rowIndex, Integer relativeRowIndex, Boolean isHead) {
}
@Override
public void afterRowCreate ( WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Row row, Integer relativeRowIndex, Boolean isHead) {
}
@Override
public void afterRowDispose ( WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Row row, Integer relativeRowIndex, Boolean isHead) {
int curRowNum = row. getRowNum ( ) ;
if ( mergeColumnIndex != null && mergeColumnIndex. length > 0 && ! isHead) {
Cell curA1Cell = row. getCell ( 0 ) ;
Object curA1Data = curA1Cell. getCellTypeEnum ( ) == CellType . STRING ? curA1Cell. getStringCellValue ( ) : curA1Cell. getNumericCellValue ( ) ;
Cell preA1Cell = row. getSheet ( ) . getRow ( curRowNum - 1 ) . getCell ( 0 ) ;
Object preA1Data = preA1Cell. getCellTypeEnum ( ) == CellType . STRING ? preA1Cell. getStringCellValue ( ) : preA1Cell. getNumericCellValue ( ) ;
if ( curA1Data. equals ( preA1Data) ) {
count ++ ;
} else {
if ( count > 0 ) {
for ( int i = 0 ; i < mergeColumnIndex. length; i++ ) {
mergeSomeRow ( writeSheetHolder, curRowNum, count, i) ;
}
count = 0 ;
}
}
if ( curRowNum == segTotalNum && count > 0 ) {
for ( int i = 0 ; i < mergeColumnIndex. length; i++ ) {
mergeSomeRow ( writeSheetHolder, curRowNum + 1 , count, i) ;
}
}
}
}
private void mergeSomeRow ( WriteSheetHolder writeSheetHolder, int curRowIndex, int needMergeNum, int curColIndex) {
Sheet sheet = writeSheetHolder. getSheet ( ) ;
try {
CellRangeAddress cellAddresses = new CellRangeAddress ( curRowIndex - needMergeNum - 1 , curRowIndex- 1 , curColIndex, curColIndex) ;
Field sh = sheet. getClass ( ) . getDeclaredField ( "_sh" ) ;
sh. setAccessible ( true ) ;
XSSFSheet shSheet = ( XSSFSheet ) sh. get ( sheet) ;
CTWorksheet worksheet = shSheet. getCTWorksheet ( ) ;
CTMergeCells ctMergeCells = mergedTotalCount > 0 ? worksheet. getMergeCells ( ) : worksheet. addNewMergeCells ( ) ;
CTMergeCell ctMergeCell = ctMergeCells. addNewMergeCell ( ) ;
ctMergeCell. setRef ( cellAddresses. formatAsString ( ) ) ;
mergedTotalCount ++ ;
} catch ( Exception e) {
e. printStackTrace ( ) ;
}
}
public Integer getSegTotalNum ( ) {
return segTotalNum;
}
public void setSegTotalNum ( Integer segTotalNum) {
this . segTotalNum= segTotalNum;
}
}
解析
1.以下成员变量非必须的,因为我的业务要分段导出,所以segTotalNum是动态传入的。
合并行计数count:获取每次单元格合并的索引;
合并列索引mergeColumnIndex,也是动态判断传入的,用于确定合并发生在哪几列。
已合并单元格数mergedTotalCount 是为了重用CTMergeCells对象,这里如有更好的实现方法可以评论区告诉我。
private Integer segTotalNum;
private int count;
private int [ ] mergeColumnIndex;
private int mergedTotalCount = 0 ;
2.我的业务是根据第一个单元格判断即可确认是否是相同内容,并且累计行结束索引,这部分可以在其他地方判断后再传入,但是我的合并单元格位置是动态变化的,只能动态判断获取索引。
@Override
public void afterRowDispose ( WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Row row, Integer relativeRowIndex, Boolean isHead) {
int curRowNum = row. getRowNum ( ) ;
if ( mergeColumnIndex != null && mergeColumnIndex. length > 0 && ! isHead) {
Cell curA1Cell = row. getCell ( 0 ) ;
Object curA1Data = curA1Cell. getCellTypeEnum ( ) == CellType . STRING ? curA1Cell. getStringCellValue ( ) : curA1Cell. getNumericCellValue ( ) ;
Cell preA1Cell = row. getSheet ( ) . getRow ( curRowNum - 1 ) . getCell ( 0 ) ;
Object preA1Data = preA1Cell. getCellTypeEnum ( ) == CellType . STRING ? preA1Cell. getStringCellValue ( ) : preA1Cell. getNumericCellValue ( ) ;
if ( curA1Data. equals ( preA1Data) ) {
count ++ ;
} else {
if ( count > 0 ) {
for ( int i = 0 ; i < mergeColumnIndex. length; i++ ) {
mergeSomeRow ( writeSheetHolder, curRowNum, count, i) ;
}
count = 0 ;
}
}
if ( curRowNum == segTotalNum && count > 0 ) {
for ( int i = 0 ; i < mergeColumnIndex. length; i++ ) {
mergeSomeRow ( writeSheetHolder, curRowNum + 1 , count, i) ;
}
}
}
}
3.重写了底层合并单元格的实现,避开了四个校验
if (region.getNumberOfCells() < 2) {
throw new IllegalArgumentException("Merged region " + region.formatAsString() + " must contain 2 or more cells");
}
region.validate(SpreadsheetVersion.EXCEL97);
if (validate) {
// throw IllegalStateException if the argument CellRangeAddress intersects with
// a multi-cell array formula defined in this sheet
validateArrayFormulas(region);
// Throw IllegalStateException if the argument CellRangeAddress intersects with
// a merged region already in this sheet
validateMergedRegions(region);
}
,一个加锁的索引返回
public int sizeOfMergeCellArray() {
synchronized(this.monitor()) {
this.check_orphaned();
return this.get_store().count_elements(MERGECELL$0);
}
}
以下是重写后的实现:
private void mergeSomeRow ( WriteSheetHolder writeSheetHolder, int curRowIndex, int needMergeNum, int curColIndex) {
Sheet sheet = writeSheetHolder. getSheet ( ) ;
try {
CellRangeAddress cellAddresses = new CellRangeAddress ( curRowIndex - needMergeNum - 1 , curRowIndex- 1 , curColIndex, curColIndex) ;
Field sh = sheet. getClass ( ) . getDeclaredField ( "_sh" ) ;
sh. setAccessible ( true ) ;
XSSFSheet shSheet = ( XSSFSheet ) sh. get ( sheet) ;
CTWorksheet worksheet = shSheet. getCTWorksheet ( ) ;
CTMergeCells ctMergeCells = mergedTotalCount > 0 ? worksheet. getMergeCells ( ) : worksheet. addNewMergeCells ( ) ;
CTMergeCell ctMergeCell = ctMergeCells. addNewMergeCell ( ) ;
ctMergeCell. setRef ( cellAddresses. formatAsString ( ) ) ;
mergedTotalCount ++ ;
} catch ( Exception e) {
e. printStackTrace ( ) ;
}
}
4.需要获取加锁后的索引,不想重写方法,可以参考下面做法
private void mergeSomeRow ( WriteSheetHolder writeSheetHolder, int curRowIndex, int needMergeNum, int curColIndex) {
Sheet sheet = writeSheetHolder. getSheet ( ) ;
CellRangeAddress cellAddresses = new CellRangeAddress ( curRowIndex - needMergeNum - 1 , curRowIndex- 1 , curColIndex, curColIndex) ;
int mergeIndex = sheet. addMergedRegionUnsafe ( cellAddresses) ;
}
结语
性能和安全还是要按照实际取舍,顺便提一下,我重写的方法是没有对Excel版本判断的,使用的是ExcelTypeEnum.XLSX,
以上只是本人的陋见,如果有更好的请告诉我。
EasyExcel 版本 :
compile "com.alibaba:easyexcel:'2.2.6'"