参考前辈博客修改实现POI读取格式为xls的Excel表格时分Sheet页保存,最后验证在两位前辈的代码基础上实现,其中第二位前辈完成了数据的中间存储,如果需要可以参考第二位前辈的方法,在此感谢二位前辈:
1、https://blog.csdn.net/sundy_fly/article/details/78134322
2、http://gaosheng08.iteye.com/blog/624604
以下内容为完成Excel解析后的分页存储
1、实现原理:
原理是利用BOFRecord特点完成分页,该对象会在一个Sheet页开始时出现,多以根据它的特点,当一个Sheet页开始时可以确定上一个Sheet页的内容已经保存完,多以这时可以保存上一个Sheet页的内容,并清空当前保存Sheet内容的集合,方便下一个Sheet页保存;对于最后一个sheet页的内容可以在Excel内容解析完时保存。这时目前采用的解决方案,如果哪位大佬有更好的方法,欢迎探讨。。。。。。
2、实现方案代码:(该方法是自己摸索,还有不完善之处,如有雷同,均属巧合,望谅解)
1)该部分是实现分页的代码,是分页存储的核心部分,该方法有个痛点是所有解析页中如果出现有空Sheet也内容,尤其是Sheet页出现在有内容的Sheet也之前时可能会出现Sheet页名称和内容对不上的错乱,之后如果有好的方法会及时更新,如果大佬们发现有好的方法,欢迎一块探讨。
/**
* 该部分是实现分页的主要部分
* 可以使用BOFRecord作为表格的Sheet开始和结束的区分对象,当一个Sheet页开始时会出现该对象,
* 所以在该对象出现时可以作为上一个Sheet页内容的结束存储,把该sheet页内容存储起来,清空
* Sheet页内容存储记录,为下一个Sheet页存储做准备,再用来存储下一个Sheet页
* @author Drug 2019年4月21日
*/
private void processCategorySheet()
{
//如果表项存在,并且当前内容不为空
if(boundSheetRecords != null && !boundSheetRecords.isEmpty() &&
boundSheetRecords.get(sheetIndex) != null
&& contents != null && !contents.isEmpty())
{
//当前Sheet记录
BoundSheetRecord sheetRecord =
boundSheetRecords.get(sheetIndex++);
String sheetname = sheetRecord.getSheetname();
List<String> tempContents = new ArrayList<String>();
tempContents.addAll(contents);
//将当前表内容存储起来
sheetContents.put(sheetname, tempContents);
//清空存储Sheet表缓存记录集合
contents.clear();
}
}
2)以下部分是实现将最后一个sheet内容存储起来
//在该处将最后一个Sheet页内容存储起来
if(boundSheetRecords != null && !boundSheetRecords.isEmpty() &&
boundSheetRecords.get(sheetIndex) != null)
{
BoundSheetRecord sheetRecord = boundSheetRecords.get(sheetIndex);
List<String> tempcontents = new ArrayList<>();
tempcontents.addAll(contents);
sheetContents.put(sheetRecord.getSheetname(), tempcontents);
contents = new ArrayList<>();
}
3、以下是在开始提到的两位前辈的基础上实现的完整过程,如有不当之处请谅解
package com.feng.poi.excel;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingRowDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ParserXLSExcel implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = 0;
private ArrayList<BoundSheetRecord> boundSheetRecords =
new ArrayList<BoundSheetRecord>();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private int curRow;
private List<String> rowlist;
private List<String> contents = new ArrayList<String>();
private List<String> sheet1Contents = new ArrayList<String>();
//分Sheet页将内容存储起来
private Map<String, List<String>> sheetContents =
new HashMap<String, List<String>>();
public List<String> getSheet1Contents() {
return sheet1Contents;
}
public ParserXLSExcel(POIFSFileSystem fs) throws Exception {
this.fs = fs;
this.minColumns = -1;
this.curRow = 0;
this.rowlist = new ArrayList<String>();
}
public ParserXLSExcel(String filename) throws Exception {
this(new POIFSFileSystem(new FileInputStream(filename)));
}
//excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
public void optRows(int sheetIndex,int curRow, List<String> rowlist){
StringBuffer sb = new StringBuffer();
for (int i = 0 ;i< rowlist.size();i++){
String str = rowlist.get(i);
if(null!=str&&!"".equals(str)) sb.append(str.trim()+" ");
}
contents.add(sb.toString());
}
/**读取到的Excel文档,返回文档的content*/
public List<String> getList() {
return contents;
}
/**
* 遍历 excel 文件
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
//在该处将最后一个Sheet页内容存储起来
if(boundSheetRecords != null && !boundSheetRecords.isEmpty() &&
boundSheetRecords.get(sheetIndex) != null)
{
BoundSheetRecord sheetRecord = boundSheetRecords.get(sheetIndex);
List<String> tempcontents = new ArrayList<>();
tempcontents.addAll(contents);
sheetContents.put(sheetRecord.getSheetname(), tempcontents);
contents = new ArrayList<>();
}
}
public Map<String, List<String>> getSheetContents() {
return sheetContents;
}
/**
* 可以使用BOFRecord作为表格的Sheet开始和结束的区分对象,当一个Sheet页开始时会出现该对象,
* 所以在该对象出现时可以作为上一个Sheet页内容的结束存储,把该sheet页内容存储起来,清空Sheet
* 页内容存储记录,为下一个Sheet页存储做准备,再用来存储下一个Sheet页
* @author Drug 2019年4月21日
*/
private void processCategorySheet()
{
//如果表项存在,并且当前内容不为空
if(boundSheetRecords != null && !boundSheetRecords.isEmpty() &&
boundSheetRecords.get(sheetIndex) != null
&& contents != null && !contents.isEmpty())
{
//当前Sheet记录
BoundSheetRecord sheetRecord =
boundSheetRecords.get(sheetIndex++);
String sheetname = sheetRecord.getSheetname();
List<String> tempContents = new ArrayList<String>();
tempContents.addAll(contents);
//将当前表内容存储起来
sheetContents.put(sheetname, tempContents);
//清空存储Sheet表缓存记录集合
contents.clear();
}
}
/**
* HSSFListener 监听方法,处理 Record
*/
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add((BoundSheetRecord)record);
break;
case BOFRecord.sid:
//碰到该对象时完成记录分页存储
processCategorySheet();
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("")?"":value;
rowlist.add(value);
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord != null){
value = sstRecord
.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("")?"":value;
rowlist.add(value);
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("")?"":value;
rowlist.add(value);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn," ");
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
// 行结束时, 调用 optRows() 方法
lastColumnNumber = -1;
try {
optRows(sheetIndex,curRow, rowlist);
} catch (Exception e) {
e.printStackTrace();
}
rowlist.clear();
}
// 行结束时的操作
if (record instanceof MissingRowDummyRecord) {
if (thisRow > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
// 行结束时, 调用 optRows() 方法
lastColumnNumber = -1;
contents.clear();
}
}
public static void main(String[] args) throws Exception {
ParserXLSExcel excel = new ParserXLSExcel("G:\\test.xls");
excel.process();
List<String> list2 = excel.getList();
Pattern pattern = Pattern.compile(reg);
Map<String, List<String>> sheetContents2 = excel.getSheetContents();
for (String string : sheetContents2.keySet()) {
System.out.println("---------" + string + "------------");
List<String> list = sheetContents2.get(string);
int len = list.size();
for (int i = 0; i < len; i++) {
String replace = list.get(i).trim().replace("","");
System.out.println(replace);
}
}
}
}
以上输出格式或者细节部分没有太多的注意,有问题自行修改,如果有好的方式欢迎探讨,欢迎转载!