2007格式:
excel2007文件格式与之前版本不同,之前版本采用的是微软自己的存储格式。07版内容的存储采用XML格式,所以,理所当然的,对大数据量的xlsx文件的读取采用的也是XML的处理方式SAX。
同之前的版本一样,大数据量文件的读取采用的是事件模型eventusermodel。usermodel模式需要将文件一次性全部读到内存中,07版的既然采用的存储模式是xml,解析用的DOM方式也是如此,这种模式操作简单,容易上手,但是对于大量数据占用的内存也是相当可观,在Eclipse中经常出现内存溢出。
下面就是采用eventusermodel对07excel文件读取。
同上篇,我将当前行的单元格数据存储到List中,抽象出 optRows 方法,该方法会在每行末尾时调用,方法参数为当前行索引curRow(int型)及存有行内单元格数据的List。继承类只需实现该行级方法即可。
- package com.gaosheng.util.xls;
- import java.io.InputStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.List;
- import org.apache.poi.xssf.eventusermodel.XSSFReader;
- import org.apache.poi.xssf.model.SharedStringsTable;
- import org.apache.poi.xssf.usermodel.XSSFRichTextString;
- import org.apache.poi.openxml4j.opc.OPCPackage;
- import org.xml.sax.Attributes;
- import org.xml.sax.InputSource;
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.DefaultHandler;
- import org.xml.sax.helpers.XMLReaderFactory;
- /**
- * XSSF and SAX (Event API)
- */
- public abstract class XxlsAbstract extends DefaultHandler {
- private SharedStringsTable sst;
- private String lastContents;
- private boolean nextIsString;
- private int sheetIndex = -1;
- private List<String> rowlist = new ArrayList<String>();
- private int curRow = 0; //当前行
- private int curCol = 0; //当前列索引
- private int preCol = 0; //上一列列索引
- private int titleRow = 0; //标题行,一般情况下为0
- private int rowsize = 0; //列数
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型 编写自己的业务
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
- public void processOneSheet(String filename,int sheetId) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- // rId2 found by processing the Workbook
- // 根据 rId# 或 rSheet# 查找sheet
- InputStream sheet2 = r.getSheet("rId"+sheetId);
- sheetIndex++;
- InputSource sheetSource = new InputSource(sheet2);
- parser.parse(sheetSource);
- sheet2.close();
- }
- /**
- * 遍历 excel 文件
- */
- public void process(String filename) throws Exception {
- OPCPackage pkg = OPCPackage.open(filename);
- XSSFReader r = new XSSFReader(pkg);
- SharedStringsTable sst = r.getSharedStringsTable();
- XMLReader parser = fetchSheetParser(sst);
- Iterator<InputStream> sheets = r.getSheetsData();
- while (sheets.hasNext()) {
- curRow = 0;
- sheetIndex++;
- InputStream sheet = sheets.next();
- InputSource sheetSource = new InputSource(sheet);
- parser.parse(sheetSource);
- sheet.close();
- }
- }
- public XMLReader fetchSheetParser(SharedStringsTable sst)
- throws SAXException {
- XMLReader parser = XMLReaderFactory
- .createXMLReader("org.apache.xerces.parsers.SAXParser");
- this.sst = sst;
- parser.setContentHandler(this);
- return parser;
- }
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- // c => 单元格
- if (name.equals("c")) {
- // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
- String cellType = attributes.getValue("t");
- String rowStr = attributes.getValue("r");
- curCol = this.getRowIndex(rowStr);
- if (cellType != null && cellType.equals("s")) {
- nextIsString = true;
- } else {
- nextIsString = false;
- }
- }
- // 置空
- lastContents = "";
- }
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- // 根据SST的索引值的到单元格的真正要存储的字符串
- // 这时characters()方法可能会被调用多次
- if (nextIsString) {
- try {
- int idx = Integer.parseInt(lastContents);
- lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
- .toString();
- } catch (Exception e) {
- }
- }
- // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
- // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
- if (name.equals("v")) {
- String value = lastContents.trim();
- value = value.equals("")?" ":value;
- int cols = curCol-preCol;
- if (cols>1){
- for (int i = 0;i < cols-1;i++){
- rowlist.add(preCol,"");
- }
- }
- preCol = curCol;
- rowlist.add(curCol-1, value);
- }else {
- //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
- if (name.equals("row")) {
- int tmpCols = rowlist.size();
- if(curRow>this.titleRow && tmpCols<this.rowsize){
- for (int i = 0;i < this.rowsize-tmpCols;i++){
- rowlist.add(rowlist.size(), "");
- }
- }
- try {
- optRows(sheetIndex,curRow,rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- if(curRow==this.titleRow){
- this.rowsize = rowlist.size();
- }
- rowlist.clear();
- curRow++;
- curCol = 0;
- preCol = 0;
- }
- }
- }
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- //得到单元格内容的值
- lastContents += new String(ch, start, length);
- }
- //得到列索引,每一列c元素的r属性构成为字母加数字的形式,字母组合为列索引,数字组合为行索引,
- //如AB45,表示为第(A-A+1)*26+(B-A+1)*26列,45行
- public int getRowIndex(String rowStr){
- rowStr = rowStr.replaceAll("[^A-Z]", "");
- byte[] rowAbc = rowStr.getBytes();
- int len = rowAbc.length;
- float num = 0;
- for (int i=0;i<len;i++){
- num += (rowAbc[i]-'A'+1)*Math.pow(26,len-i-1 );
- }
- return (int) num;
- }
- public int getTitleRow() {
- return titleRow;
- }
- public void setTitleRow(int titleRow) {
- this.titleRow = titleRow;
- }
- }
20003格式:
- package com.gaosheng.util.xls;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.sql.SQLException;
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
- import org.apache.poi.hssf.eventusermodel.HSSFListener;
- import org.apache.poi.hssf.eventusermodel.HSSFRequest;
- import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
- import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
- import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
- import org.apache.poi.hssf.model.HSSFFormulaParser;
- import org.apache.poi.hssf.record.BOFRecord;
- import org.apache.poi.hssf.record.BlankRecord;
- import org.apache.poi.hssf.record.BoolErrRecord;
- import org.apache.poi.hssf.record.BoundSheetRecord;
- import org.apache.poi.hssf.record.FormulaRecord;
- import org.apache.poi.hssf.record.LabelRecord;
- import org.apache.poi.hssf.record.LabelSSTRecord;
- import org.apache.poi.hssf.record.NoteRecord;
- import org.apache.poi.hssf.record.NumberRecord;
- import org.apache.poi.hssf.record.RKRecord;
- import org.apache.poi.hssf.record.Record;
- import org.apache.poi.hssf.record.SSTRecord;
- import org.apache.poi.hssf.record.StringRecord;
- import org.apache.poi.hssf.usermodel.HSSFWorkbook;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- public abstract class HxlsAbstract implements HSSFListener {
- private int minColumns;
- private POIFSFileSystem fs;
- private PrintStream output;
- private int lastRowNumber;
- private int lastColumnNumber;
- /** Should we output the formula, or the value it has? */
- private boolean outputFormulaValues = true;
- /** For parsing Formulas */
- private SheetRecordCollectingListener workbookBuildingListener;
- private HSSFWorkbook stubWorkbook;
- // Records we pick up as we process
- private SSTRecord sstRecord;
- private FormatTrackingHSSFListener formatListener;
- /** So we known which sheet we're on */
- private int sheetIndex = -1;
- private BoundSheetRecord[] orderedBSRs;
- @SuppressWarnings("unchecked")
- private ArrayList boundSheetRecords = new ArrayList();
- // For handling formulas with string results
- private int nextRow;
- private int nextColumn;
- private boolean outputNextStringRecord;
- private int curRow;
- private List<String> rowlist;
- @SuppressWarnings( "unused")
- private String sheetName;
- public HxlsAbstract(POIFSFileSystem fs)
- throws SQLException {
- this.fs = fs;
- this.output = System.out;
- this.minColumns = -1;
- this.curRow = 0;
- this.rowlist = new ArrayList<String>();
- }
- public HxlsAbstract(String filename) throws IOException,
- FileNotFoundException, SQLException {
- this(new POIFSFileSystem(new FileInputStream(filename)));
- }
- //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型
- // public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;
- //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
- public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;
- /**
- * 遍历 excel 文件
- */
- public void process() throws IOException {
- MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
- this);
- formatListener = new FormatTrackingHSSFListener(listener);
- HSSFEventFactory factory = new HSSFEventFactory();
- HSSFRequest request = new HSSFRequest();
- if (outputFormulaValues) {
- request.addListenerForAllRecords(formatListener);
- } else {
- workbookBuildingListener = new SheetRecordCollectingListener(
- formatListener);
- request.addListenerForAllRecords(workbookBuildingListener);
- }
- factory.processWorkbookEvents(request, fs);
- }
- /**
- * HSSFListener 监听方法,处理 Record
- */
- @SuppressWarnings("unchecked")
- public void processRecord(Record record) {
- int thisRow = -1;
- int thisColumn = -1;
- String thisStr = null;
- String value = null;
- switch (record.getSid()) {
- case BoundSheetRecord.sid:
- boundSheetRecords.add(record);
- break;
- case BOFRecord.sid:
- BOFRecord br = (BOFRecord) record;
- if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
- // Create sub workbook if required
- if (workbookBuildingListener != null && stubWorkbook == null) {
- stubWorkbook = workbookBuildingListener
- .getStubHSSFWorkbook();
- }
- // Works by ordering the BSRs by the location of
- // their BOFRecords, and then knowing that we
- // process BOFRecords in byte offset order
- sheetIndex++;
- if (orderedBSRs == null) {
- orderedBSRs = BoundSheetRecord
- .orderByBofPosition(boundSheetRecords);
- }
- sheetName = orderedBSRs[sheetIndex].getSheetname();
- }
- break;
- case SSTRecord.sid:
- sstRecord = (SSTRecord) record;
- break;
- case BlankRecord.sid:
- BlankRecord brec = (BlankRecord) record;
- thisRow = brec.getRow();
- thisColumn = brec.getColumn();
- thisStr = "";
- break;
- case BoolErrRecord.sid:
- BoolErrRecord berec = (BoolErrRecord) record;
- thisRow = berec.getRow();
- thisColumn = berec.getColumn();
- thisStr = "";
- break;
- case FormulaRecord.sid:
- FormulaRecord frec = (FormulaRecord) record;
- thisRow = frec.getRow();
- thisColumn = frec.getColumn();
- if (outputFormulaValues) {
- if (Double.isNaN(frec.getValue())) {
- // Formula result is a string
- // This is stored in the next record
- outputNextStringRecord = true;
- nextRow = frec.getRow();
- nextColumn = frec.getColumn();
- } else {
- thisStr = formatListener.formatNumberDateCell(frec);
- }
- } else {
- thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
- frec.getParsedExpression()) + '"';
- }
- break;
- case StringRecord.sid:
- if (outputNextStringRecord) {
- // String for formula
- StringRecord srec = (StringRecord) record;
- thisStr = srec.getString();
- thisRow = nextRow;
- thisColumn = nextColumn;
- outputNextStringRecord = false;
- }
- break;
- case LabelRecord.sid:
- LabelRecord lrec = (LabelRecord) record;
- curRow = thisRow = lrec.getRow();
- thisColumn = lrec.getColumn();
- value = lrec.getValue().trim();
- value = value.equals("")?" ":value;
- this.rowlist.add(thisColumn, value);
- break;
- case LabelSSTRecord.sid:
- LabelSSTRecord lsrec = (LabelSSTRecord) record;
- curRow = thisRow = lsrec.getRow();
- thisColumn = lsrec.getColumn();
- if (sstRecord == null) {
- rowlist.add(thisColumn, " ");
- } else {
- value = sstRecord
- .getString(lsrec.getSSTIndex()).toString().trim();
- value = value.equals("")?" ":value;
- rowlist.add(thisColumn,value);
- }
- break;
- case NoteRecord.sid:
- NoteRecord nrec = (NoteRecord) record;
- thisRow = nrec.getRow();
- thisColumn = nrec.getColumn();
- // TODO: Find object to match nrec.getShapeId()
- thisStr = '"' + "(TODO)" + '"';
- break;
- case NumberRecord.sid:
- NumberRecord numrec = (NumberRecord) record;
- curRow = thisRow = numrec.getRow();
- thisColumn = numrec.getColumn();
- value = formatListener.formatNumberDateCell(numrec).trim();
- value = value.equals("")?" ":value;
- // Format
- rowlist.add(thisColumn, value);
- break;
- case RKRecord.sid:
- RKRecord rkrec = (RKRecord) record;
- thisRow = rkrec.getRow();
- thisColumn = rkrec.getColumn();
- thisStr = '"' + "(TODO)" + '"';
- break;
- default:
- break;
- }
- // 遇到新行的操作
- if (thisRow != -1 && thisRow != lastRowNumber) {
- lastColumnNumber = -1;
- }
- // 空值的操作
- if (record instanceof MissingCellDummyRecord) {
- MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
- curRow = thisRow = mc.getRow();
- thisColumn = mc.getColumn();
- rowlist.add(thisColumn," ");
- }
- // 如果遇到能打印的东西,在这里打印
- if (thisStr != null) {
- if (thisColumn > 0) {
- output.print(',');
- }
- output.print(thisStr);
- }
- // 更新行和列的值
- if (thisRow > -1)
- lastRowNumber = thisRow;
- if (thisColumn > -1)
- lastColumnNumber = thisColumn;
- // 行结束时的操作
- if (record instanceof LastCellOfRowDummyRecord) {
- if (minColumns > 0) {
- // 列值重新置空
- if (lastColumnNumber == -1) {
- lastColumnNumber = 0;
- }
- }
- // 行结束时, 调用 optRows() 方法
- lastColumnNumber = -1;
- try {
- optRows(sheetIndex,curRow, rowlist);
- } catch (SQLException e) {
- e.printStackTrace();
- }
- rowlist.clear();
- }
- }
- }
###########大数据导出导出时采用SXSSFWorkbook处理大数据###################
/**创建空模板 利用SXSSF技术,降低内存使用率**/
Workbook wb = new SXSSFWorkbook(1000);
Sheet sheet = wb.createSheet();
spring在上传excel文件时,有时候会有缓存,即上传的excel解析后会有重复,所有contrllor层时,一定要使用注解@scoper='prototype"