1.解析类
package com.cetc.ExcelPoi;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* Created by shea on 2018/10/12.
*/
public class BigExcelParse extends DefaultHandler {
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
/**
* Table with styles
*/
private StylesTable stylesTable;
//取SST 的索引对应的值
private SharedStringsTable sharedStringsTable;
/**
* 最大列数
*/
private final int minColumnCount;
//单元格内容是SST 的索引
private boolean vIsOpen;
private xssfDataType nextDataType;
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;
// Gathers characters as they are seen.
private StringBuffer value;
private String[] record;//封装每一行的结果
private List<String[]> rows = new ArrayList<String[]>();//封装结果
private boolean isCellNull = false;
private SimpleDateFormat sdf = null;
private static DecimalFormat df = new DecimalFormat("###########");
//构造器
public BigExcelParse(StylesTable styles,
SharedStringsTable strings, int cols) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
record = new String[this.minColumnCount];
rows.clear();// 每次读取都清空行集合
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}
}
public void endElement(String uri, String localName, String name)
throws SAXException {
String thisStr = null;
// v => contents of a cell
//单元格内容标签结束,characters方法会被调用处理内容
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = value.toString();
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(
value.toString());
thisStr =rtsi.toString();
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sharedStringsTable.getEntryAt(idx));
thisStr = rtss.toString();
} catch (NumberFormatException ex) {
System.out.println("Failed to parse SST index '" + sstIndex
+ "': " + ex.toString());
}
break;
case NUMBER:
String n = value.toString();
// 判断是否是日期格式
if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
|| (176<=formatIndex && formatIndex<=178) || (182<=formatIndex && formatIndex<=196)
|| (210<=formatIndex && formatIndex<=213) || (208==formatIndex ) ) {// 日期
sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr=sdf.format(date);
} else if (formatIndex == 20 || formatIndex == 32 || formatIndex==183 || (200<=formatIndex && formatIndex<=209)) {//时间
sdf = new SimpleDateFormat("HH:mm");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr = sdf.format(date);
}
else{
if(n.contains("E")){//科学计数法
String[] split = n.split("\\+");
String e = split[0].replaceAll("E|e", "");
thisStr=e.replace(".","");
}else {
thisStr = n;
}
}
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
//判断单元格的值是否为空
if (thisStr == null || "".equals(isCellNull)) {
isCellNull = true;// 设置单元格是否为空值
}
record[thisColumn] = thisStr;
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
//行结束,存储一行数据
} else if ("row".equals(name)) {
// Print out any missing commas if needed
if (minColumnCount > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
// 判断是否空行
if(record!=null &&record.length!=0){
rows.add(record.clone());
isCellNull = false;
for (int i = 0; i < record.length; i++) {
record[i] = null;
}
}
}
lastColumnNumber = -1;
}
}
public List<String[]> getRows() {
return rows;
}
public void setRows(List<String[]> rows) {
this.rows = rows;
}
public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
}
2.调用类
package com.cetc.ExcelPoi;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.*;
import java.util.HashMap;
import java.util.List;
/**
* Created by shea on 2018/10/12.
*/
public class TestBigExcelParse {
private OPCPackage xlsxPackage;
private int minColumns;
private PrintStream output;
private String sheetName;
//构造器
public TestBigExcelParse(OPCPackage pkg, PrintStream output,
String sheetName, int minColumns) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
this.sheetName = sheetName;
}
/**
*
* @param path----文件路径
* @param sheetName---解析的sheet名称
* @param minColumns---excel的最大列数
* @return
* @throws Exception
*/
public static HashMap<String, Object> parseSheet(String path,String sheetName,int minColumns)throws Exception{
//文件地址
OPCPackage pkg = OPCPackage.open(path,PackageAccess.READ);
XSSFReader r = new XSSFReader( pkg );
//解析的sheet名称
// InputStream rId1 = r.getSheet("rId1");//第一个sheet表
XSSFReader.SheetIterator sheetsData = (XSSFReader.SheetIterator)r.getSheetsData();
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable styles = r.getStylesTable();
XMLReader parser = XMLReaderFactory.createXMLReader();
BigExcelParse handler = new BigExcelParse(styles, sst, minColumns);
parser.setContentHandler(handler);
//遍历---获取指定的sheet名称
HashMap<String, Object> map = new HashMap<String, Object>();
while (sheetsData.hasNext()){
InputStream in = sheetsData.next();
if(sheetName.equals(sheetsData.getSheetName())){
InputSource inputSource = new InputSource(in);
parser.parse(inputSource);
List<String[]> rows = handler.getRows();//返回所有的封装结果
map.put("success", true);
map.put("msg","解析完成!");
map.put("rows",rows);
in.close();
}
}
if(map.isEmpty()){
map.put("success", false);
map.put("msg","解析失败,没有找到相应的sheet表!");
}
return map;
}
/**
* 测试方法调用
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception{
HashMap<String,Object> res = parseSheet("C:\\Users\\shea\\Desktop\\测试2.xlsx", "Sheet1", 285);
System.out.println((String)res.get("msg"));
List<String[]> rows = (List<String[]>)res.get("rows");
BufferedWriter out = new BufferedWriter(new FileWriter("C:\\Users\\shea\\Desktop\\测试_bigExcel.txt"));
for (String[] row:rows
) {
for (String cell:row
) {
out.write(cell+"\t");
}
out.newLine();
}
out.flush();
out.close();
}
}