POI Sax 事件驱动解析Excel
事件情景
找了不少资料,本文记录一下成果吧,主要使用POI Sax 事件驱动解析Excel,主要是excel2003和excel2007两种类型的工具类,好了话不多说直接上代码。
1.pom.xml引入依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.11.0</version>
</dependency>
2.ExcelReaderUtil
package com.hualala.data.meta.platform.common.common.utils;
public class ExcelReaderUtil {
// excel2003扩展名
public static final String EXCEL03_EXTENSION = ".xls";
// excel2007扩展名
public static final String EXCEL07_EXTENSION = ".xlsx";
/**
* 读取Excel文件,可能是03也可能是07版本
*
* @param reader
* @param fileName
* @throws Exception
*/
public static void readExcel(IExcelRowReader reader, String fileName) throws Exception {
// 处理excel2003文件
if (fileName.endsWith(EXCEL03_EXTENSION)) {
ExcelXlsReader exceXls = new ExcelXlsReader();
exceXls.setRowReader(reader);
exceXls.process(fileName);
// 处理excel2007文件
} else if (fileName.endsWith(EXCEL07_EXTENSION)) {
ExcelXlsxReader exceXlsx = new ExcelXlsxReader();
exceXlsx.setRowReader(reader);
exceXlsx.process(fileName);
} else {
throw new Exception("文件格式错误,fileName的扩展名只能是xls或xlsx。");
}
}
/**
* 测试
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
IExcelRowReader rowReader = new ExcelRowReader();
//ExcelReaderUtil.readExcel(rowReader, "F://test_one.xls");
ExcelReaderUtil.readExcel(rowReader, "F://test_two.xlsx");
}
}
3.IExcelRowReader
package com.hualala.data.meta.platform.common.common.utils;
import java.util.List;
public interface IExcelRowReader {
/**
* 业务逻辑实现方法
*
* @param sheetIndex
* @param curRow
* @param rowlist
*/
void getRows(int sheetIndex, int curRow, List<String> rowlist);
}
4.ExcelRowReader
package com.hualala.data.meta.platform.common.common.utils;
import lombok.extern.slf4j.Slf4j;
import java.util.List;
@Slf4j
public class ExcelRowReader implements IExcelRowReader {
@Override
public void getRows(int sheetIndex, int curRow, List<String> rowlist) {
System.out.print(curRow+" ");
for (int i = 0; i < rowlist.size(); i++) {
System.out.print(rowlist.get(i)==""?"*":rowlist.get(i) + " ");
}
System.out.println();
}
}
5.ExcelXlsReader
package com.hualala.data.meta.platform.common.common.utils;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ExcelXlsReader implements HSSFListener {
private int minColumns = -1;
private POIFSFileSystem fs;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
// excel2003工作薄
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
// 表索引
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
// 当前行
private int curRow = 0;
// 存储行记录的容器
private List<String> rowlist = new ArrayList<String>();;
@SuppressWarnings("unused")
private String sheetName;
private IExcelRowReader rowReader;
public void setRowReader(IExcelRowReader rowReader) {
this.rowReader = rowReader;
}
/**
* 遍历excel下所有的sheet
*
* @throws IOException
*/
public void process(String fileName) throws IOException {
this.fs = new POIFSFileSystem(new FileInputStream(fileName));
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* HSSFListener 监听方法,处理 Record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// 如果有需要,则建立子工作薄
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
rowlist.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: // 单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
rowlist.add(thisColumn, thisStr);
break;
case FormulaRecord.sid: // 单元格为公式类型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
rowlist.add(thisColumn, thisStr);
break;
case StringRecord.sid:// 单元格中公式的字符串
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? " " : value;
this.rowlist.add(thisColumn, value);
break;
case LabelSSTRecord.sid: // 单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
rowlist.add(thisColumn, " ");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? " " : value;
rowlist.add(thisColumn, value);
}
break;
case NumberRecord.sid: // 单元格为数字类型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("") ? " " : value;
// 向容器加入列值
rowlist.add(thisColumn, value);
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn, " ");
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
// 每行结束时, 调用getRows() 方法
rowReader.getRows(sheetIndex, curRow, rowlist);
// 清空容器
rowlist.clear();
}
}
public static void main(String[] args) {
IExcelRowReader rowReader = new ExcelRowReader();
try {
System.out.println("**********************************************");
ExcelReaderUtil.readExcel(rowReader,
"F://test_one.xls");
} catch (Exception e) {
e.printStackTrace();
}
}
}
6.ExcelXlsxReader
package com.hualala.data.meta.platform.common.common.utils;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class ExcelXlsxReader extends DefaultHandler {
private IExcelRowReader rowReader;
public void setRowReader(IExcelRowReader rowReader) {
this.rowReader = rowReader;
}
/**
* 共享字符串表
*/
private SharedStringsTable sst;
/**
* 上一次的内容
*/
private String lastContents;
/**
* 字符串标识
*/
private boolean nextIsString;
/**
* 工作表索引
*/
private int sheetIndex = -1;
/**
* 行集合
*/
private List<String> rowlist = new ArrayList<String>();
/**
* 当前行
*/
private int curRow = 0;
/**
* 当前列
*/
private int curCol = 0;
/**
* T元素标识
*/
private boolean isTElement;
/**
* 异常信息,如果为空则表示没有异常
*/
private String exceptionMessage;
/**
* 单元格数据类型,默认为字符串类型
*/
private CellDataType nextDataType = CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
private short formatIndex;
private String formatString;
// 定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等
private String preRef = null, ref = null;
// 定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格
private String maxRef = null;
/**
* 单元格
*/
private StylesTable stylesTable;
/**
* 遍历工作簿中所有的电子表格
*
* @param filename
* @throws IOException
* @throws OpenXML4JException
* @throws SAXException
* @throws Exception
*/
public void process(String filename) throws IOException, OpenXML4JException, SAXException {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader xssfReader = new XSSFReader(pkg);
stylesTable = xssfReader.getStylesTable();
SharedStringsTable sst = xssfReader.getSharedStringsTable();
XMLReader parser = this.fetchSheetParser(sst);
Iterator<InputStream> sheets = xssfReader.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// c => 单元格
if ("c".equals(name)) {
// 前一个单元格的位置
if (preRef == null) {
preRef = attributes.getValue("r");
} else {
preRef = ref;
}
// 当前单元格的位置
ref = attributes.getValue("r");
// 设定单元格类型
this.setNextDataType(attributes);
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 当元素为t时
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
// 置空
lastContents = "";
}
/**
* 单元格中的数据可能的数据类型
*/
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
/**
* 处理数据类型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER;
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
String columData = attributes.getValue("r");
if ("b".equals(cellType)) {
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) {
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) {
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) {
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if ("m/d/yy" == formatString) {
nextDataType = CellDataType.DATE;
formatString = "yyyy-MM-dd hh:mm:ss.SSS";
}
if (formatString == null) {
nextDataType = CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
/**
* 对解析出来的数据进行类型处理
*
* @param value
* 单元格的值(这时候是一串数字)
* @param thisStr
* 一个空字符串
* @return
*/
@SuppressWarnings("deprecation")
public String getDataValue(String value, String thisStr) {
switch (nextDataType) {
// 这几个的顺序不能随便交换,交换了很可能会导致数据错误
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));
thisStr = rtss.toString();
rtss = null;
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
break;
case NUMBER:
if (formatString != null) {
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
} else {
thisStr = value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE:
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);
// 对日期字符串作特殊处理
thisStr = thisStr.replace(" ", "T");
break;
default:
thisStr = " ";
break;
}
return thisStr;
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString && StringUtils.isNotEmpty(lastContents) && StringUtils.isNumeric(lastContents)) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
}
// t元素也包含字符串
if (isTElement) {
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
String value = lastContents.trim();
rowlist.add(curCol, value);
curCol++;
isTElement = false;
} else if ("v".equals(name)) {
// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
String value = this.getDataValue(lastContents.trim(), "");
// 补全单元格之间的空单元格
if (!ref.equals(preRef)) {
int len = countNullCell(ref, preRef);
for (int i = 0; i < len; i++) {
rowlist.add(curCol, "");
curCol++;
}
}
rowlist.add(curCol, value);
curCol++;
} else {
// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
// 默认第一行为表头,以该行单元格数目为最大数目
if (curRow == 0) {
maxRef = ref;
}
// 补全一行尾部可能缺失的单元格
if (maxRef != null) {
int len = countNullCell(maxRef, ref);
for (int i = 0; i <= len; i++) {
rowlist.add(curCol, "");
curCol++;
}
}
rowReader.getRows(sheetIndex, curRow, rowlist);
rowlist.clear();
curRow++;
curCol = 0;
preRef = null;
ref = null;
}
}
}
/**
* 计算两个单元格之间的单元格数目(同一行)
*
* @param ref
* @param preRef
* @return
*/
public int countNullCell(String ref, String preRef) {
// excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD
String xfd = ref.replaceAll("\\d+", "");
String xfd_1 = preRef.replaceAll("\\d+", "");
xfd = fillChar(xfd, 3, '@', true);
xfd_1 = fillChar(xfd_1, 3, '@', true);
char[] letter = xfd.toCharArray();
char[] letter_1 = xfd_1.toCharArray();
int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
return res - 1;
}
/**
* 字符串的填充
*
* @param str
* @param len
* @param let
* @param isPre
* @return
*/
String fillChar(String str, int len, char let, boolean isPre) {
int len_1 = str.length();
if (len_1 < len) {
if (isPre) {
for (int i = 0; i < (len - len_1); i++) {
str = let + str;
}
} else {
for (int i = 0; i < (len - len_1); i++) {
str = str + let;
}
}
}
return str;
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
// 得到单元格内容的值
lastContents += new String(ch, start, length);
}
/**
* @return the exceptionMessage
*/
public String getExceptionMessage() {
return exceptionMessage;
}
public static void main(String[] args) {
IExcelRowReader rowReader = new ExcelRowReader();
try {
// ExcelReaderUtil.readExcel(rowReader,
// "E://2016-07-04-011940a.xls");
System.out.println("**********************************************");
ExcelReaderUtil.readExcel(rowReader, "F://test_two.xlsx");
} catch (Exception e) {
e.printStackTrace();
}
}
}
7.ExcelXlsxReader(如果6在解析字符串类型数字抛下标越界异常请使用这个试试)
package com.data.meta.platform.common.common.utils.ExcelUtil;
import com.data.meta.platform.common.common.utils.IExcelRowReader;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.*;
public class ExcelXlsxReader extends DefaultHandler {
/**
* 单元格中的数据可能的数据类型
*/
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
/**
* 共享字符串表
*/
private SharedStringsTable sst;
/**
* 上一次的索引值
*/
private String lastIndex;
/**
* 工作表索引
*/
private int sheetIndex = -1;
/**
* 总行数
*/
private int totalRows=0;
/**
* 一行内cell集合
*/
private List<String> cellList = new ArrayList<String>();
/**
* 判断整行是否为空行的标记
*/
private boolean flag = false;
/**
* 当前行
*/
private int curRow = 0;
/**
* 当前列
*/
private int curCol = 0;
/**
* T元素标识
*/
private boolean isTElement;
/**
* 异常信息,如果为空则表示没有异常
*/
private String exceptionMessage;
/**
* 单元格数据类型,默认为字符串类型
*/
private CellDataType nextDataType = CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
/**
* 单元格日期格式的索引
*/
private short formatIndex;
/**
* 日期格式字符串
*/
private String formatString;
//定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等
private String preRef = null, ref = null;
//定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格
private String maxRef = null;
/**
* 单元格
*/
private StylesTable stylesTable;
//数据处理接口
private IExcelRowReader rowReader;
public void setRowReader(IExcelRowReader rowReader) {
this.rowReader = rowReader;
}
/**
* 遍历工作簿中所有的电子表格
* 并缓存在mySheetList中
*
* @param file csv文件(路径+文件)
* @throws Exception
*/
public void process(File file) throws Exception {
OPCPackage pkg = OPCPackage.open(file);
XSSFReader xssfReader = new XSSFReader(pkg);
stylesTable = xssfReader.getStylesTable();
SharedStringsTable sst = xssfReader.getSharedStringsTable();
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (sheets.hasNext()) { //遍历sheet
curRow = 0; //标记初始行为第一行
sheetIndex++;
InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource); //解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行
sheet.close();
break;
}
}
/**
* 第一个执行
*
* @param uri
* @param localName
* @param name
* @param attributes
* @throws SAXException
*/
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
//c => 单元格
if ("c".equals(name)) {
//前一个单元格的位置
if (preRef == null) {
if (!("A"+(curRow+1)).equals(attributes.getValue("r"))){
for(int i=0;i<covertRowIdtoInt(attributes.getValue("r"))-1;i++){
cellList.add("");
curCol++;
}
}
}
preRef = (preRef == null?attributes.getValue("r"):ref);
//当前单元格的位置
ref = attributes.getValue("r");
if (!ref.equals(preRef)) {
int len = countNullCell(ref, preRef);
for (int i = 0; i < len; i++) {
cellList.add("");
curCol++;
}
}
cellList.add("");
curCol++;
//设定单元格类型
this.setNextDataType(attributes);
}
//当元素为t时
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
//置空
lastIndex = "";
}
/**
* 第二个执行
* 得到单元格对应的索引值或是内容值
* 如果单元格类型是字符串、INLINESTR、数字、日期,lastIndex则是索引值
* 如果单元格类型是布尔值、错误、公式,lastIndex则是内容值
* @param ch
* @param start
* @param length
* @throws SAXException
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
lastIndex += new String(ch, start, length);
}
/**
* 第三个执行
*
* @param uri
* @param localName
* @param name
* @throws SAXException
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
//t元素也包含字符串
if (isTElement) {//这个程序没经过
//将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
String value = lastIndex.trim();
cellList.set(curCol-1, value);
isTElement = false;
//如果里面某个单元格含有值,则标识该行不为空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else if ("v".equals(name)) {
//v => 单元格的值,如果单元格是字符串,则v标签的值为该字符串在SST中的索引
String value = this.getDataValue(lastIndex.trim(), "");//根据索引值获取对应的单元格值
if(StringUtils.isNotBlank(value)){
cellList.set(curCol-1, value);
}
//如果里面某个单元格含有值,则标识该行不为空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else {
//如果标签名称为row,这说明已到行尾,调用optRows()方法
if ("row".equals(name)) {
//默认第一行为表头,以该行单元格数目为最大数目
if (curRow == 0) {
maxRef = ref;
}
//补全一行尾部可能缺失的单元格
if (maxRef != null) {
int len = countNullCell(maxRef, ref);
for (int i = 0; i <= len; i++) {
cellList.add("");
}
}
rowReader.getRows(sheetIndex, curRow, cellList);
cellList.clear();
curRow++;
curCol = 0;
preRef = null;
ref = null;
}
}
}
/**
* 处理数据类型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER; //cellType为空,则表示该单元格类型为数字
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t"); //单元格类型
String cellStyleStr = attributes.getValue("s"); //
if ("b".equals(cellType)) { //处理布尔值
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) { //处理错误
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) { //处理字符串
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) { //处理日期
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58){
nextDataType = CellDataType.DATE;
switch (formatIndex) {
case 14:
formatString = formatString.equals("m/d/yy")?"yyyy/MM/dd":"yyyy-MM-dd";
break;
case 31:
formatString = "yyyy年MM月dd日";
break;
case 57:
formatString = "yyyy年MM月";
break;
case 58:
formatString = "MM月dd日";
break;
default:
break;
}
} else if(StringUtils.isNotBlank(formatString)){
if(formatString.contains("m/d/yy")){
nextDataType = CellDataType.DATE;
formatString = "yyyy-MM-dd hh:mm:ss";
}
} else {
nextDataType = CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
/**
* 对解析出来的数据进行类型处理
* @param value 单元格的值,
* value代表解析:BOOL的为0或1, ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值,
* SSTINDEX的为索引值需转换为内容值, NUMBER为内容值,DATE为内容值
* @param thisStr 一个空字符串
* @return
*/
public String getDataValue(String value, String thisStr) {
switch (nextDataType) {
// 这几个的顺序不能随便交换,交换了很可能会导致数据错误
case BOOL: //布尔值
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR: //错误
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA: //公式
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX: //字符串
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值
thisStr = rtss.toString();
rtss = null;
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
break;
case NUMBER: //数字
if (formatString != null) {
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
} else {
thisStr = value.contains(".")?String.valueOf(Double.parseDouble(value)):value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE: //日期
SimpleDateFormat targetFormat = new SimpleDateFormat(formatString);
Calendar calendar = new GregorianCalendar(1900,0,-1);
calendar.add(Calendar.DATE, Integer.valueOf(value));
thisStr = targetFormat.format(calendar.getTime());
thisStr = thisStr.replace("T", " ");
break;
default:
thisStr = "";
break;
}
return thisStr;
}
public int countNullCell(String ref, String preRef) {
//excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD
String xfd = ref.replaceAll("\\d+", "");
String xfd_1 = preRef.replaceAll("\\d+", "");
xfd = fillChar(xfd, 3, '@', true);
xfd_1 = fillChar(xfd_1, 3, '@', true);
char[] letter = xfd.toCharArray();
char[] letter_1 = xfd_1.toCharArray();
int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
return res - 1;
}
public String fillChar(String str, int len, char let, boolean isPre) {
int len_1 = str.length();
if (len_1 < len) {
if (isPre) {
for (int i = 0; i < (len - len_1); i++) {
str = let + str;
}
} else {
for (int i = 0; i < (len - len_1); i++) {
str = str + let;
}
}
}
return str;
}
/**
* @return the exceptionMessage
*/
public String getExceptionMessage() {
return exceptionMessage;
}
/**
* 列号转数字 AB7-->28 第28列
* @param rowId
* @return
*/
public static int covertRowIdtoInt(String rowId){
int firstDigit = -1;
for (int c = 0; c < rowId.length(); ++c) {
if (Character.isDigit(rowId.charAt(c))) {
firstDigit = c;
break;
}
}
//AB7-->AB
//AB是列号, 7是行号
String newRowId = rowId.substring(0,firstDigit);
int num = 0;
int result = 0;
int length = newRowId.length();
for(int i = 0; i < length; i++) {
//先取最低位,B
char ch = newRowId.charAt(length - i - 1);
//B表示的十进制2,ascii码相减,以A的ascii码为基准,A表示1,B表示2
num = (int)(ch - 'A' + 1) ;
//列号转换相当于26进制数转10进制
num *= Math.pow(26, i);
result += num;
}
return result;
}
}