使用SAXReader方式解析excel
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
/**
* @author tkgup
* @since 2021/5/29
*/
public class MyHandler extends DefaultHandler {
private static final String empty = "";
// 样式数据
private final StylesTable stylesTable;
// 取SST的索引对应的值
private final ReadOnlySharedStringsTable sst;
// 解析结果保存
private final List<List<String>> container;
// 是否忽略中单的空行
private final boolean undividedData;
// 存储cell标签下v标签包裹的字符文本内容
// 在v标签开始后,解析器自动调用characters()保存到 cellValue
// 但当cell标签的属性 s是 t时, 表示取到的cellValue是SharedStringsTable 的index值
// 需要在v标签结束时根据 index(cellValue)获取一次真正的值
private String cellValue;
// 记录数据类型
private DataType nextDataType;
// 根据dimension得出最大列
private int maxColumn;
// 根据dimension得出总行数
private int maxRow;
// 需要解析的列数
private int parseColumn;
// 需要解析的行数
private int parseRow;
// 上个有内容的单元格id,判断空单元格
private Integer preCellIndex;
// 当前行id,判断空行
private Integer rowId;
// 判断单元格cell的c标签下是否有v,否则可能数据错位
private boolean hasV;
// 行数据保存
private List<String> currentRowData;
public MyHandler(ReadOnlySharedStringsTable sst, StylesTable stylesTable,
List<List<String>> container) {
this(sst, stylesTable, container, -1);
}
public MyHandler(ReadOnlySharedStringsTable sst, StylesTable stylesTable,
List<List<String>> container, int parseRow) {
this(sst, stylesTable, container, parseRow, -1);
}
public MyHandler(ReadOnlySharedStringsTable sst, StylesTable stylesTable,
List<List<String>> container, int parseRow, int parseColumn) {
this(sst, stylesTable, container, parseRow, parseColumn, false);
}
public MyHandler(ReadOnlySharedStringsTable sst, StylesTable stylesTable,
List<List<String>> container, int parseRow, int parseColumn, boolean undividedData) {
this.stylesTable = stylesTable;
this.sst = sst;
this.container = container;
this.parseRow = parseRow;
this.parseColumn = parseColumn;
this.undividedData = undividedData;
}
public static void main(String[] args) throws Exception {
String filePath = "日记.xlsx";
List<List<String>> sheet = parse(filePath);
System.out.println();
for (List<String> row : sheet) {
System.out.println(String.join(",", row));
}
System.out.println();
}
private static List<List<String>> parse(String filePath)
throws Exception {
//将文件以压缩包的形式读入
OPCPackage opcPackage = OPCPackage.open(filePath, PackageAccess.READ);
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage, false);
//读入样式信息
XSSFReader xssfReader = new XSSFReader(opcPackage);
//这里只读入了第一个sheet,如果需要读取多个可以遍历这个迭代器
InputStream stream = xssfReader.getSheetsData().next();
//构造XLSXReader读取内容
List<List<String>> container = parse(xssfReader.getStylesTable(), strings, stream);
//关闭流
stream.close();
opcPackage.close();
return container;
}
private static List<List<String>> parse(StylesTable styles, ReadOnlySharedStringsTable strings,
InputStream sheetInputStream) throws Exception {
//将输入流包装成XML源
InputSource sheetSource = new InputSource(sheetInputStream);
//构造基于标签回调的XMLReader
SAXParserFactory factory = getSafeSaxParserFactory();
SAXParser saxParser = factory.newSAXParser();
XMLReader reader = saxParser.getXMLReader();
List<List<String>> container = new ArrayList<>();
//构造XLSXReader
MyHandler handler = new MyHandler(strings, styles, container, 3, 5, true);
//添加管理者
reader.setContentHandler(handler);
//读入XML源
try {
reader.parse(sheetSource);
} catch (ParseRuntimeException e) {
doNothing();
} catch (IOException | SAXException e) {
throw new RuntimeException("解析数据时发生异常");
}
return container;
}
private static SAXParserFactory getSafeSaxParserFactory()
throws ParserConfigurationException, SAXNotRecognizedException, SAXNotSupportedException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl",true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
return factory;
}
private static int getColumnNumByCellIndex(String cellId) {
return parseColumn(cellId.substring(0, getRowNumberIndex(cellId)));
}
private static int getRowNumByCellIndex(String cellId) {
return Integer.parseInt(cellId.substring(getRowNumberIndex(cellId)));
}
private static int parseColumn(String columnId) {
// 列号字符转数字
int result = 0;
for (char num : columnId.toCharArray()) {
result = result * 26 + ((num - 'A') + 1);
}
return result;
}
private static int getRowNumberIndex(String cellId) {
int firstDigit = -1;
for (int i = 0; i < cellId.length(); i++) {
if (Character.isDigit(cellId.charAt(i))) {
firstDigit = i;
break;
}
}
return firstDigit;
}
private static void doNothing() {
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
// 置空上一个单元格保存的内容,准备保存当前单元格的
cellValue = empty;
if ("dimension".equals(qName)) {
String dimension = attributes.getValue("ref");
String endCell = dimension.substring(dimension.indexOf(":") + 1);
maxColumn = getColumnNumByCellIndex(endCell);
maxRow = getRowNumByCellIndex(endCell);
parseColumn = parseColumn < 0 ? maxColumn : parseColumn;
parseRow = parseRow < 0 ? maxRow : parseRow;
}
// 行开始
if ("row".equals(qName)) {
preCellIndex = null;
int rowNum = Integer.parseInt(attributes.getValue("r"));
checkReadRow(rowNum);
addEmptyRow(rowNum);
currentRowData = new ArrayList<>();
}
// 单元格
if ("c".equals(qName)) {
int cellIndex = getColumnNumByCellIndex(attributes.getValue("r"));
fillRowMidEmpty(cellIndex);
preCellIndex = cellIndex;
nextDataType = DataType.getDataType(attributes, stylesTable);
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
cellValue += new String(ch, start, length);
}
@Override
public void endElement(String uri, String localName, String qName) {
if ("row".equals(qName)) {
fillRowTailEmpty();
checkAddRow();
}
if ("c".equals(qName)) {
if (!hasV) {
checkAdd(empty);
}
hasV = false;
}
if ("v".equals(qName)) {
hasV = true;
checkAdd(nextDataType.dealValue(sst, cellValue));
}
}
private void checkAddRow() {
if (undividedData) {
container.add(currentRowData);
return;
}
for (String cellV : currentRowData) {
if (Optional.ofNullable(cellV).orElse("").length() > 0) {
container.add(currentRowData);
return;
}
}
}
private void checkAdd(String cellValue) {
if (currentRowData.size() < parseColumn) {
currentRowData.add(cellValue);
}
}
private void checkReadRow(int rowNum) {
if (parseRow >= 0 && rowNum - 1 >= parseRow) {
// 用于达到指定行数时中断解析操作
if (undividedData || container.size() >= parseRow) {
throw new ParseRuntimeException();
}
}
}
private void addEmptyRow(int rowNum) {
if (undividedData && rowId != null) {
//与上一行相差2, 说明中间有空行
int gap = rowNum - rowId;
while (gap-- > 1) {
container.add(Collections.nCopies(parseColumn, empty));
}
}
rowId = rowNum;
}
private void fillRowMidEmpty(int cellIndex) {
// 空单元判断,填充空字符到list
int leftColumn = Math.min(cellIndex, parseColumn);
if (preCellIndex != null) {
// 解析的单元格 非 此行第一个有值单元格
int gap = leftColumn - preCellIndex;
// 为中间间隙填值
for (int i = 0; i < gap - 1; i++) {
currentRowData.add(empty);
}
} else {
// 解析的单元格 为 此行第一个有值单元格
// 非第一个单元格时为前面的补值
if (leftColumn != 1) {
for (int i = 0; i < leftColumn - 1; i++) {
currentRowData.add(empty);
}
}
}
}
private void fillRowTailEmpty() {
// 判断最后一个单元格是否在最后,补齐列数
// 有的单元格只修改单元格格式,而没有内容,会出现c标签下没有v标签,导致currentRow少
int dataSize = currentRowData.size();
for (int i = 0; i < parseColumn - dataSize; i++) {
currentRowData.add(empty);
}
}
enum DataType {
BOOL("b") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return cellValue.charAt(0) == 'A' ? "false" : "true";
}
},
ERROR("e") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return cellValue;
}
},
FORMULA("str") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return cellValue;
}
},
INLINE_STR("inlineStr") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return new XSSFRichTextString(cellValue).toString();
}
},
SST_INDEX("s") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return sst.getItemAt(Integer.parseInt(cellValue)).toString();
}
},
NUMBER("d") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
return cellValue;
}
},
STYLE("") {
@Override
public String dealValue(ReadOnlySharedStringsTable sst, String cellValue) {
if (DateUtil.isADateFormat(formatIndex, formatString)) {
return dateFormat.format(DateUtil.getJavaDate(Double.parseDouble(cellValue)));
}
if (formatString != null) {
return formatter.formatRawCellContents(Double.parseDouble(cellValue), formatIndex, formatString);
}
return cellValue;
}
};
protected static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
protected static final DataFormatter formatter = new DataFormatter();
protected short formatIndex;
protected String formatString;
private final String desc;
DataType(String desc) {
this.desc = desc;
}
public static DataType getDataType(Attributes attributes, StylesTable stylesTable) {
String cellType = attributes.getValue("t");
for (DataType type : values()) {
if (type.desc.equals(cellType)) {
return type;
}
}
String cellStyleStr = attributes.getValue("s");
if (cellStyleStr != null) {
XSSFCellStyle style = stylesTable.getStyleAt(Integer.parseInt(cellStyleStr));
short formatIndex = style.getDataFormat();
STYLE.formatIndex = formatIndex;
STYLE.formatString = Optional.ofNullable(style.getDataFormatString())
.orElse(BuiltinFormats.getBuiltinFormat(formatIndex));
return STYLE;
}
return NUMBER;
}
public abstract String dealValue(ReadOnlySharedStringsTable sst, String cellValue);
}
static class ParseRuntimeException extends RuntimeException {
private static final long serialVersionUID = 3582523992040854628L;
}
}