Java实现POI读取格式为xls的Excel表格时分Sheet页保存

参考前辈博客修改实现POI读取格式为xls的Excel表格时分Sheet页保存,最后验证在两位前辈的代码基础上实现,其中第二位前辈完成了数据的中间存储,如果需要可以参考第二位前辈的方法,在此感谢二位前辈:

1、https://blog.csdn.net/sundy_fly/article/details/78134322

2、http://gaosheng08.iteye.com/blog/624604

 

以下内容为完成Excel解析后的分页存储

1、实现原理:

        原理是利用BOFRecord特点完成分页,该对象会在一个Sheet页开始时出现,多以根据它的特点,当一个Sheet页开始时可以确定上一个Sheet页的内容已经保存完,多以这时可以保存上一个Sheet页的内容,并清空当前保存Sheet内容的集合,方便下一个Sheet页保存;对于最后一个sheet页的内容可以在Excel内容解析完时保存。这时目前采用的解决方案,如果哪位大佬有更好的方法,欢迎探讨。。。。。。

2、实现方案代码:(该方法是自己摸索,还有不完善之处,如有雷同,均属巧合,望谅解)

 1)该部分是实现分页的代码,是分页存储的核心部分,该方法有个痛点是所有解析页中如果出现有空Sheet也内容,尤其是Sheet页出现在有内容的Sheet也之前时可能会出现Sheet页名称和内容对不上的错乱,之后如果有好的方法会及时更新,如果大佬们发现有好的方法,欢迎一块探讨。

/**
 * 该部分是实现分页的主要部分
 * 可以使用BOFRecord作为表格的Sheet开始和结束的区分对象,当一个Sheet页开始时会出现该对象,
 * 所以在该对象出现时可以作为上一个Sheet页内容的结束存储,把该sheet页内容存储起来,清空
 * Sheet页内容存储记录,为下一个Sheet页存储做准备,再用来存储下一个Sheet页
 * @author  Drug   2019年4月21日
 */
private void processCategorySheet()
{
	//如果表项存在,并且当前内容不为空
	if(boundSheetRecords != null && !boundSheetRecords.isEmpty() && 
				boundSheetRecords.get(sheetIndex) != null  
				&& contents != null && !contents.isEmpty())
	{
		//当前Sheet记录
		BoundSheetRecord sheetRecord = 
				boundSheetRecords.get(sheetIndex++);
		String sheetname = sheetRecord.getSheetname();
		List<String> tempContents = new ArrayList<String>();
		tempContents.addAll(contents);
		//将当前表内容存储起来
		sheetContents.put(sheetname, tempContents);
		//清空存储Sheet表缓存记录集合
		contents.clear();
	}
}

2)以下部分是实现将最后一个sheet内容存储起来

//在该处将最后一个Sheet页内容存储起来
if(boundSheetRecords != null && !boundSheetRecords.isEmpty() && 
        	boundSheetRecords.get(sheetIndex) != null)
{
     BoundSheetRecord sheetRecord = boundSheetRecords.get(sheetIndex);
     List<String> tempcontents = new ArrayList<>();
     tempcontents.addAll(contents);
     sheetContents.put(sheetRecord.getSheetname(), tempcontents);
     contents = new ArrayList<>();
}

3、以下是在开始提到的两位前辈的基础上实现的完整过程,如有不当之处请谅解

package com.feng.poi.excel;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingRowDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class ParserXLSExcel implements HSSFListener {
	
	private int minColumns;  
    private POIFSFileSystem fs;  
  
    private int lastRowNumber;  
    private int lastColumnNumber;  
  
    /** Should we output the formula, or the value it has? */  
    private boolean outputFormulaValues = true;  
  
    /** For parsing Formulas */  
    private SheetRecordCollectingListener workbookBuildingListener;  
    private HSSFWorkbook stubWorkbook;  
  
    // Records we pick up as we process  
    private SSTRecord sstRecord;  
    private FormatTrackingHSSFListener formatListener;  
  
    /** So we known which sheet we're on */  
    private int sheetIndex = 0;  
    private ArrayList<BoundSheetRecord> boundSheetRecords = 
    		new ArrayList<BoundSheetRecord>();  
  
    // For handling formulas with string results  
    private int nextRow;  
    private int nextColumn;  
    private boolean outputNextStringRecord;  
  
    private int curRow;  
    private List<String> rowlist;  
    
	private  List<String> contents = new ArrayList<String>();
	private  List<String> sheet1Contents = new ArrayList<String>();
	//分Sheet页将内容存储起来
	private  Map<String, List<String>> sheetContents = 
			new HashMap<String, List<String>>();
	
    public List<String> getSheet1Contents() {
		return sheet1Contents;
	}

	public ParserXLSExcel(POIFSFileSystem fs) throws Exception {  
        this.fs = fs;  
        this.minColumns = -1;  
        this.curRow = 0;  
        this.rowlist = new ArrayList<String>();  
    }  
  
    public ParserXLSExcel(String filename) throws Exception {  
        this(new POIFSFileSystem(new FileInputStream(filename)));  
    }  
    
    //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型  
    public void optRows(int sheetIndex,int curRow, List<String> rowlist){
     	StringBuffer sb = new StringBuffer();
        for (int i = 0 ;i< rowlist.size();i++){  
            String str = rowlist.get(i);
           if(null!=str&&!"".equals(str)) sb.append(str.trim()+" ");
        }  
        contents.add(sb.toString());
    }
    /**读取到的Excel文档,返回文档的content*/  
    public List<String> getList() {
		return contents;
	}
    
    /** 
     * 遍历 excel 文件 
     */  
    public void process() throws IOException {  
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);  
        formatListener = new FormatTrackingHSSFListener(listener);  
        HSSFEventFactory factory = new HSSFEventFactory();  
        HSSFRequest request = new HSSFRequest();  
        if (outputFormulaValues) {  
            request.addListenerForAllRecords(formatListener);  
        } else {  
            workbookBuildingListener = new SheetRecordCollectingListener(formatListener);  
            request.addListenerForAllRecords(workbookBuildingListener);  
        }  
        factory.processWorkbookEvents(request, fs);
        //在该处将最后一个Sheet页内容存储起来
        if(boundSheetRecords != null && !boundSheetRecords.isEmpty() && 
        		boundSheetRecords.get(sheetIndex) != null)
        {
        	BoundSheetRecord sheetRecord = boundSheetRecords.get(sheetIndex);
        	List<String> tempcontents = new ArrayList<>();
        	tempcontents.addAll(contents);
        	sheetContents.put(sheetRecord.getSheetname(), tempcontents);
        	contents = new ArrayList<>();
        }
    }  
      
    public Map<String, List<String>> getSheetContents() {
		return sheetContents;
	}

	/**
	 * 可以使用BOFRecord作为表格的Sheet开始和结束的区分对象,当一个Sheet页开始时会出现该对象,
	 * 所以在该对象出现时可以作为上一个Sheet页内容的结束存储,把该sheet页内容存储起来,清空Sheet
	 * 页内容存储记录,为下一个Sheet页存储做准备,再用来存储下一个Sheet页
	 * @author  Drug   2019年4月21日
	 */
	private void processCategorySheet()
	{
		//如果表项存在,并且当前内容不为空
		if(boundSheetRecords != null && !boundSheetRecords.isEmpty() && 
				boundSheetRecords.get(sheetIndex) != null 
				&& contents != null && !contents.isEmpty())
		{
			//当前Sheet记录
			BoundSheetRecord sheetRecord = 
					boundSheetRecords.get(sheetIndex++);
			String sheetname = sheetRecord.getSheetname();
			List<String> tempContents = new ArrayList<String>();
			tempContents.addAll(contents);
			//将当前表内容存储起来
			sheetContents.put(sheetname, tempContents);
			//清空存储Sheet表缓存记录集合
			contents.clear();
		}
	}

	/** 
     * HSSFListener 监听方法,处理 Record 
     */  
    public void processRecord(Record record) {  
        int thisRow = -1;  
        int thisColumn = -1;  
        String thisStr = null;  
        String value = null;  
          
        switch (record.getSid()) {  
        case BoundSheetRecord.sid:  
            boundSheetRecords.add((BoundSheetRecord)record);
            break;  
        case BOFRecord.sid: 
        	//碰到该对象时完成记录分页存储
        	processCategorySheet();
            break;  
  
        case SSTRecord.sid:  
            sstRecord = (SSTRecord) record;  
            break;  
  
        case BlankRecord.sid:  
            BlankRecord brec = (BlankRecord) record;  
  
            thisRow = brec.getRow();  
            thisColumn = brec.getColumn();  
            thisStr = "";  
            break;  
        case BoolErrRecord.sid:  
            BoolErrRecord berec = (BoolErrRecord) record;  
  
            thisRow = berec.getRow();  
            thisColumn = berec.getColumn();  
            thisStr = "";  
            break;  
  
        case FormulaRecord.sid:  
            FormulaRecord frec = (FormulaRecord) record;  
  
            thisRow = frec.getRow();  
            thisColumn = frec.getColumn();  
  
            if (outputFormulaValues) {  
                if (Double.isNaN(frec.getValue())) {  
                    outputNextStringRecord = true;  
                    nextRow = frec.getRow();  
                    nextColumn = frec.getColumn();  
                } else {  
                    thisStr = formatListener.formatNumberDateCell(frec);  
                }  
            } else {  
                thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';  
            }  
            break;  
        case StringRecord.sid:  
            if (outputNextStringRecord) {  
                // String for formula  
                StringRecord srec = (StringRecord) record;  
                thisStr = srec.getString();  
                thisRow = nextRow;  
                thisColumn = nextColumn;  
                outputNextStringRecord = false;  
            }  
            break;  
  
        case LabelRecord.sid:  
            LabelRecord lrec = (LabelRecord) record;  
  
            curRow = thisRow = lrec.getRow();  
            thisColumn = lrec.getColumn();  
            value = lrec.getValue().trim();  
            value = value.equals("")?"":value;  
            rowlist.add(value);
            break;  
        case LabelSSTRecord.sid:  
            LabelSSTRecord lsrec = (LabelSSTRecord) record;  
  
            curRow = thisRow = lsrec.getRow();  
            thisColumn = lsrec.getColumn();  
            if (sstRecord != null){  
                value =  sstRecord  
                .getString(lsrec.getSSTIndex()).toString().trim();  
                value = value.equals("")?"":value;  
                rowlist.add(value);
            }  
            break;  
        case NoteRecord.sid:  
            NoteRecord nrec = (NoteRecord) record;  
  
            thisRow = nrec.getRow();  
            thisColumn = nrec.getColumn();  
            thisStr = '"' + "(TODO)" + '"';  
            break;  
        case NumberRecord.sid:  
            NumberRecord numrec = (NumberRecord) record;  
  
            curRow = thisRow = numrec.getRow();  
            thisColumn = numrec.getColumn();  
            value = formatListener.formatNumberDateCell(numrec).trim();  
            value = value.equals("")?"":value;  
            rowlist.add(value);
            break;  
        case RKRecord.sid:  
            RKRecord rkrec = (RKRecord) record;  
  
            thisRow = rkrec.getRow();  
            thisColumn = rkrec.getColumn();  
            thisStr = '"' + "(TODO)" + '"';  
            break;  
        default:  
            break;  
        }  
  
        // 遇到新行的操作  
        if (thisRow != -1 && thisRow != lastRowNumber) {  
            lastColumnNumber = -1;  
        }  
  
        // 空值的操作  
        if (record instanceof MissingCellDummyRecord) {  
            MissingCellDummyRecord mc = (MissingCellDummyRecord) record;  
            curRow = thisRow = mc.getRow();  
            thisColumn = mc.getColumn();  
            rowlist.add(thisColumn," ");  
        }  
  
        // 更新行和列的值  
        if (thisRow > -1)  
            lastRowNumber = thisRow;  
        if (thisColumn > -1)  
            lastColumnNumber = thisColumn;  
  
        // 行结束时的操作  
        if (record instanceof LastCellOfRowDummyRecord) {  
            if (minColumns > 0) {  
                // 列值重新置空  
                if (lastColumnNumber == -1) {  
                    lastColumnNumber = 0;  
                }  
            }  
            // 行结束时, 调用 optRows() 方法  
            lastColumnNumber = -1;  
            try {  
                optRows(sheetIndex,curRow, rowlist);  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
            rowlist.clear();  
        }  
        // 行结束时的操作  
        if (record instanceof MissingRowDummyRecord) {  
        	if (thisRow > 0) {  
        		// 列值重新置空  
        		if (lastColumnNumber == -1) {  
        			lastColumnNumber = 0;  
        		}  
        	}  
        	// 行结束时, 调用 optRows() 方法  
        	lastColumnNumber = -1;  
        	contents.clear();  
        }  
    }  
    public static void main(String[] args) throws Exception {
    	
		ParserXLSExcel excel = new ParserXLSExcel("G:\\test.xls");
		excel.process(); 
        List<String> list2 = excel.getList();
        Pattern pattern = Pattern.compile(reg);
        Map<String, List<String>> sheetContents2 = excel.getSheetContents();
        for (String string : sheetContents2.keySet()) {
			System.out.println("---------" + string + "------------");
			List<String> list = sheetContents2.get(string);
			int len = list.size();
			for (int i = 0; i < len; i++) {
				String replace = list.get(i).trim().replace("","");
                System.out.println(replace);
			}
		}
    }

}

以上输出格式或者细节部分没有太多的注意,有问题自行修改,如果有好的方式欢迎探讨,欢迎转载!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值