百万级数据Excel导入工具–SAX
-
Office2007与Office Open XML
在Office 2007之前,Office一直都是以二进制位的方式存储,但这种方式不易用于使用;MicroSoft于2005年发布了基于XML的ooxml开放文档标准。ooxml的xml
schema强调减少load time,增快parsing speed,将child elements分开存储,而不是multiple
attributes一起存,这有点类似于HTML的结构。ooxml
使用XML和ZIP技术结合进行文件存储,因为XML是一个基于文本的格式,而且ZIP容器支持内容的压缩,这样可以减少了文件的存储空间。 -
poi导入的方式和问题
①HSSF:支持Excel2003,即文件后缀名为".xls",最大存储行数为65536行;
②XSSF:支持Excel2007及2007以上,文件后缀名".xlsx",单个Sheet最大存储行数1048576行;
③SXSSF:支持Excel2007及2007以上,文件后缀名"xlsx",采用SAX+XSSF结合的方式,避免导入文件在解析时,发生OOM异常,其处理效率与单纯XSSF处理对比,六列十万数据单sheet,文件为1220KB,SXSSF方式记录三次耗时分别为7138ms,7770ms,7984,XSSF方式的耗时分别为23614ms,23090ms,23055ms,列数增加到10列时,XSSF方式OOM异常,SXSSF方式正常导入;SXSSF测试单sheet104万行,26列,文件79713KB,解析生成数据耗时55319ms,XSSF导入OOM异常;该计算机配置为Intel®Core™i7-9700 CPU@3.00GHz,RAM 16.0GB; -
SAX方式解析XML
SAX全称是Simple API for XML,它是一个接口,也是一个工具包。它是一种XML解析的替代方 法,不同于DOM解析XML文档时把所有内容一次性加载到内存中的方式,SAX的解析方式是逐行扫描文档,即边扫描-边解析。所以那些只需要单遍读取内容的应用程序就可以从SAX解析中受益,这对大型文档的解析是个巨大优势。另外,SAX
“推" 模型可用于广播环境,能够同时注册多个ContentHandler,并行接收事件,而不是在一个管道中一个接一个地进行处理。一些支持
SAX 的语法分析器包括 Xerces,Apache parser(以前的 IBM 语法分析器)、MSXML(Microsoft
语法分析器)和 XDK(Oracle 语法分析器)。这些语法分析器是最灵活的,因为它们还支持
DOM的方式解析,本文采用Xerces分析器; -
所需pom.xml依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
- FileDTO 导入的参数所需的DTO
import lombok.Data;
import lombok.EqualsAndHashCode;
@Data
@EqualsAndHashCode
/**
* @author Liuqs
* 入参DTO
*/
public class FileDTO {
private String filename;
}
- service层处理/此处贴出代码采用文件路径获取文件,并采用sax方式解析获取数据,其余方式/或者文件需要其他校验可自行校验
import lombok.extern.slf4j.Slf4j;
import org.apache.maven.surefire.shade.org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import com.example.demo.qyt.FileDTO;
import lombok.extern.slf4j.Slf4j;
import org.apache.maven.surefire.shade.org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
@Slf4j
@Service
public class ReaderService {
@Resource
private ExcelXlsxReader excelXlsxReader;
@Resource
private ExcelXlsReader excelXlsReader;
/**
* 根据本地服务器上文件路径导入,并获取sheetList数据
* @author Liuqs
* @param fileDTO
*/
public Object importTags(FileDTO fileDTO){
String filename = fileDTO.getFilename();
if (StringUtils.isBlank(filename)) {
log.error("importTags--> fail to get required parameters : {}", filename);
throw new RuntimeException("required parameters missing");
}
boolean contains = filename.contains(".xlsx") || filename.contains(".xls");
if (!contains){
log.error("importTags--> The file extension is not .xlsx or .xls : {}", filename);
throw new RuntimeException("The file extension must be .xlsx or .xls");
}
try {
if (filename.contains(".xlsx")) {
return excelXlsxReader.processMultiSheet(filename);
}else {
return excelXlsReader.processMultiSheet(filename);
}
} catch (Exception e) {
e.printStackTrace();
log.error("importTags--> fail to import tags where filename is : {}",filename);
throw new RuntimeException(e.getMessage());
}
}
}
- Util工具,定义抽象类用于扩展
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.xml.sax.helpers.DefaultHandler;
/**
*定义导入的处理方式接口
*@author Liuqs
*/
public abstract class ExcelReader extends DefaultHandler implements HSSFListener {
/**
* 根据文件路径解析单页sheet
* @param filename
* @return
* @throws Exception
*/
public abstract List<Map<String, String>> processSingleSheet(String filename) throws Exception ;
/**
* 根据文件流解析单页sheet
* @param is
* @return
* @throws Exception
*/
public abstract List<Map<String, String>> processSingleSheet(InputStream is) throws Exception ;
/**
* 根据文件路径解析所有sheet
* @param filename
* @return
* @throws Exception
*/
public abstract List<List<Map<String, String>>> processMultiSheet(String filename) throws Exception ;
/**
* 根据文件流解析所有sheet
* @param is
* @return
* @throws Exception
*/
public abstract List<List<Map<String, String>>> processMultiSheet(InputStream is) throws Exception ;
/**
* 根据文件路径解析,指定sheet
* @param filename
* @return
* @throws Exception
*/
public abstract List<Map<String, String>> processAppointSheet(String filename, Integer appointSheetIndex) throws Exception ;
/**
* 根据文件流解析,指定sheet
* @param is
* @return
* @throws Exception
*/
public abstract List<Map<String, String>> processAppointSheet(InputStream is, Integer appointSheetIndex) throws Exception ;
}
- xlsx后缀名文件导入方式
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.maven.surefire.shade.org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.springframework.stereotype.Service;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
@Service
public class ExcelXlsxReader extends ExcelReader {
/**
* 单元格中的数据可能的数据类型
*/
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
/**
* 共享字符串表
*/
private SharedStringsTable sst;
/**
* 上一次的索引值
*/
private String lastIndex;
/**
* 工作表索引
*/
private int sheetIndex = 0;
/**
* sheet名
*/
// private String sheetName = "";
/**
* 总行数
*/
// private int totalRows=0;
/**
* 一行内cell集合
*/
private List<String> cellList = new ArrayList<String>();
/**
* 判断整行是否为空行的标记
*/
private boolean flag = false;
/**
* 当前行
*/
private int curRow = 1;
/**
* 当前列
*/
private int curCol = 0;
/**
* T元素标识
*/
private boolean isTElement;
/**
* 判断上一单元格是否为文本空单元格
*/
private boolean startElementFlag = true;
private boolean endElementFlag = false;
private boolean charactersFlag = false;
/**
* 异常信息,如果为空则表示没有异常
*/
private String exceptionMessage;
/**
* 单元格数据类型,默认为字符串类型
*/
private CellDataType nextDataType = CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
/**
* 单元格日期格式的索引
*/
private short formatIndex;
/**
* 日期格式字符串
*/
private String formatString;
//定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等
private String prePreRef = "A", preRef = null, ref = null;
//定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格
private String maxRef = null;
//表头列
private List<String> headList ;
//单页sheet行头与值 行list
private List<Map<String, String>> rowsList = null ;
//多个sheet 集合
private List<List<Map<String, String>>> sheetsList = null ;
/**
* 单元格
*/
private StylesTable stylesTable;
/**
* 根据文件路径解析单页sheet
* @param filename
* @return
* @throws Exception
*/
public List<Map<String, String>> processSingleSheet(String filename) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null, 1);
return sheetList.get(0) ;
}
/**
* 根据文件流解析单页sheet
* @param is
* @return
* @throws Exception
*/
public List<Map<String, String>> processSingleSheet(InputStream is) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is, 1) ;
return sheetList.get(0) ;
}
/**
* 根据文件路径解析所有sheet
* @param filename
* @return
* @throws Exception
*/
public List<List<Map<String, String>>> processMultiSheet(String filename) throws Exception {
long start = System.currentTimeMillis();
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null, null);
long end = System.currentTimeMillis();
long time = end - start;
System.out.println();
System.out.println("importData->-> 時間 "+time);
return sheetList ;
}
/**
* 根据文件流解析所有sheet
* @param is
* @return
* @throws Exception
*/
public List<List<Map<String, String>>> processMultiSheet(InputStream is) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is, null) ;
return sheetList ;
}
/**
* 根据文件路径解析,指定sheet
* @param filename
* @return
* @throws Exception
*/
public List<Map<String, String>> processAppointSheet(String filename, Integer appointSheetIndex) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null, appointSheetIndex);
return sheetList.get(0) ;
}
/**
* 根据文件流解析,指定sheet
* @param is
* @return
* @throws Exception
*/
public List<Map<String, String>> processAppointSheet(InputStream is, Integer appointSheetIndex) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is, appointSheetIndex) ;
return sheetList.get(0) ;
}
/***
* 遍历工作簿中所有的电子表格 sheet, 可指定第几个sheet,
* @param filename
* @param appointSheetIndex
* @return
* @throws Exception
*/
private List<List<Map<String, String>>> processMultiSheet(String filename, InputStream is, Integer appointSheetIndex) throws Exception {
OPCPackage pkg = null ;
if(StringUtils.isNotBlank(filename)) {
pkg = OPCPackage.open(filename);
}else if(is != null) {
pkg = OPCPackage.open(is);
}else {
throw new RuntimeException("文件参数类型非指定类型,请检查") ;
}
XSSFReader xssfReader = new XSSFReader(pkg);
stylesTable = xssfReader.getStylesTable();
SharedStringsTable sst = xssfReader.getSharedStringsTable();
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
sheetsList = new ArrayList<List<Map<String, String>>>() ;
while (sheets.hasNext()) { //遍历sheet
curRow = 1; //标记初始行为第一行
++sheetIndex ;
InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错
//sheetName = sheets.getSheetName();
if(appointSheetIndex != null && sheetIndex != appointSheetIndex) {
continue ;
}
rowsList = new ArrayList<Map<String, String>>() ;
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource); //解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行
sheet.close();
sheetsList.add(rowsList) ;
}
return sheetsList; //返回结果列
}
/**
* 第一个执行
*
* @param uri
* @param localName
* @param name
* @param attributes
* @throws SAXException
*/
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
//c => 单元格
if ("c".equals(name)) {
//前一个单元格的位置
if (preRef == null) {
preRef = attributes.getValue("r");
} else {
//中部文本空单元格标识 ‘endElementFlag’ 判断前一次是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串跳过把空字符串的位置赋予preRef
if (endElementFlag){
preRef = ref;
}
}
//当前单元格的位置
ref = attributes.getValue("r");
//首部文本空单元格标识 ‘startElementFlag’ 判断前一次,即首部是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串, 且已知当前格,即第二格带“B”标志,则ref赋予preRef
if (!startElementFlag && !flag){ //上一个单元格为文本空单元格,执行下面的,使ref=preRef;flag为true表明该单元格之前有数据值,即该单元格不是首部空单元格,则跳过
// 这里只有上一个单元格为文本空单元格,且之前的几个单元格都没有值才会执行
preRef = ref;
}
//设定单元格类型
this.setNextDataType(attributes);
endElementFlag = false;
charactersFlag = false;
startElementFlag = false;
}
//当元素为t时
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
//置空
lastIndex = "";
}
/**
* 第二个执行
* 得到单元格对应的索引值或是内容值
* 如果单元格类型是字符串、INLINESTR、数字、日期,lastIndex则是索引值
* 如果单元格类型是布尔值、错误、公式,lastIndex则是内容值
* @param ch
* @param start
* @param length
* @throws SAXException
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
startElementFlag = true;
charactersFlag = true;
lastIndex += new String(ch, start, length);
}
/**
* 第三个执行
*
* @param uri
* @param localName
* @param name
* @throws SAXException
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
//t元素也包含字符串
if (isTElement) {//这个程序没经过
//将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
String value = lastIndex.trim();
cellList.add(curCol, value);
endElementFlag = true;
curCol++;
isTElement = false;
//如果里面某个单元格含有值,则标识该行不为空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else if ("v".equals(name)) {
//v => 单元格的值,如果单元格是字符串,则v标签的值为该字符串在SST中的索引
String value = this.getDataValue(lastIndex.trim(), "");//根据索引值获取对应的单元格值
//补全单元格之间的空单元格
if (!ref.equals(preRef)) {
int len = countNullCell(ref, preRef);
for (int i = 0; i < len; i++) {
cellList.add(curCol, "");
curCol++;
}
} else if (ref.equals(preRef) && !ref.startsWith("A")){ //ref等于preRef,且以B或者C...开头,表明首部为空格
int len = countNullCell(ref, "A");
for (int i = 0; i <= len; i++) {
cellList.add(curCol, "");
curCol++;
}
}
cellList.add(curCol, value);
curCol++;
endElementFlag = true;
//如果里面某个单元格含有值,则标识该行不为空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else {
//如果标签名称为row,这说明已到行尾,调用optRows()方法
if ("row".equals(name)) {
//默认第一行为表头,以该行单元格数目为最大数目
if (curRow == 1) {
maxRef = ref;
headList = new ArrayList<String>() ;
headList.addAll(cellList) ;
}
//补全一行尾部可能缺失的单元格
if (maxRef != null) {
int len = -1;
//前一单元格,true则不是文本空字符串,false则是文本空字符串
if (charactersFlag){
len = countNullCell(maxRef, ref);
}else {
len = countNullCell(maxRef, preRef);
}
for (int i = 0; i <= len; i++) {
cellList.add(curCol, "");
curCol++;
}
}
if (flag&&curRow!=1){ //该行不为空行且该行不是第一行,则发送(第一行为列名,不需要)
Map<String, String> rowsMap = new HashMap<String, String>() ;
for(int i = 0 ; i < headList.size(); i ++) {
String colsKey = headList.get(i) ;
String colsValue = cellList.get(i) ;
rowsMap.put(colsKey, colsValue) ;
}
rowsList.add(rowsMap) ;
//totalRows++;
}
cellList.clear();
curRow++;
curCol = 0;
preRef = null;
prePreRef = null;
ref = null;
flag=false;
}
}
}
/**
* 处理数据类型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER; //cellType为空,则表示该单元格类型为数字
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t"); //判断字符串单元格类型(值为内容值)
String cellStyleStr = attributes.getValue("s"); //判断日期和数字类型单元格(值为索引值)
String columnData = attributes.getValue("r"); //获取单元格的位置,如A1,B1
if ("b".equals(cellType)) { //处理布尔值
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) { //处理错误
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) { //处理字符串
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) { //处理日期
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if (formatString.contains("m/d/yyyy") || formatString.contains("yyyy/mm/dd")|| formatString.contains("yyyy/m/d")||formatString.contains("m/d/yy") ) {
nextDataType = CellDataType.DATE;
formatString = "yyyy-MM-dd hh:mm:ss";
}
if (formatString == null) {
nextDataType = CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
/**
* 对解析出来的数据进行类型处理
* @param value 单元格的值,
* value代表解析:BOOL的为0或1, ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值,
* SSTINDEX的为索引值需转换为内容值, NUMBER为内容值,DATE为内容值
* @param thisStr 一个空字符串
* @return
*/
public String getDataValue(String value, String thisStr) {
switch (nextDataType) {
// 这几个的顺序不能随便交换,交换了很可能会导致数据错误
case BOOL: //布尔值
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR: //错误
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA: //公式
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX: //字符串
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值
thisStr = rtss.toString();
//有些字符串是文本格式的,但内容却是日期
rtss = null;
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
break;
case NUMBER: //数字
if (formatString != null) {
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
} else {
thisStr = value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE: //日期
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);
// 对日期字符串作特殊处理,去掉T
thisStr = thisStr.replace("T", " ");
break;
default:
thisStr = " ";
break;
}
return thisStr;
}
public int countNullCell(String ref, String preRef) {
//excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD
String xfd = ref.replaceAll("\\d+", "");
String xfd_1 = preRef.replaceAll("\\d+", "");
xfd = fillChar(xfd, 3, '@', true);
xfd_1 = fillChar(xfd_1, 3, '@', true);
char[] letter = xfd.toCharArray();
char[] letter_1 = xfd_1.toCharArray();
int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
return res - 1;
}
public String fillChar(String str, int len, char let, boolean isPre) {
int len_1 = str.length();
if (len_1 < len) {
if (isPre) {
for (int i = 0; i < (len - len_1); i++) {
str = let + str;
}
} else {
for (int i = 0; i < (len - len_1); i++) {
str = str + let;
}
}
}
return str;
}
/**
* @return the exceptionMessage
*/
public String getExceptionMessage() {
return exceptionMessage;
}
@Override
public void processRecord(Record record) {
return ;
}
}
- xls后缀名文件导入处理
import org.apache.maven.surefire.shade.org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.springframework.stereotype.Service;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Service
public class ExcelXlsReader extends ExcelReader {
private int minColums = -1;
private POIFSFileSystem fs;
/**
* 总行数
*/
private int totalRows = 0;
/**
* 上一行row的序号
*/
private int lastRowNumber;
/**
* 上一单元格的序号
*/
private int lastColumnNumber;
/**
* 是否输出formula,还是它对应的值
*/
private boolean outputFormulaValues = true;
/**
* 用于转换formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
// excel2003工作簿
private HSSFWorkbook stubWorkbook;
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
private final HSSFDataFormatter formatter = new HSSFDataFormatter();
// 表索引
private int sheetIndex = 0;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
// 当前行
private int curRow = 0;
// 存储一行记录所有单元格的容器
private List<String> cellList = new ArrayList<String>() ;
/**
* 判断整行是否为空行的标记
*/
private boolean flag = false;
private String sheetName;
// 表头列
private List<String> headList;
// 单页sheet行头与值 行list
private List<Map<String, String>> rowsList = null;
// 多个sheet 集合
private List<List<Map<String, String>>> sheetsList = null;
/**
* 根据文件路径解析单页sheet
* @param filename
* @return
* @throws Exception
*/
public List<Map<String, String>> processSingleSheet(String filename) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null);
return sheetList.get(0) ;
}
/**
* 根据文件流解析单页sheet
* @param is
* @return
* @throws Exception
*/
public List<Map<String, String>> processSingleSheet(InputStream is) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is) ;
return sheetList.get(0) ;
}
/**
* 根据文件路径解析所有sheet
* @param filename
* @return
* @throws Exception
*/
public List<List<Map<String, String>>> processMultiSheet(String filename) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null);
return sheetList ;
}
/**
* 根据文件流解析所有sheet
* @param is
* @return
* @throws Exception
*/
public List<List<Map<String, String>>> processMultiSheet(InputStream is) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is) ;
return sheetList ;
}
/**
* 根据文件路径解析,指定sheet
* @param filename
* @return
* @throws Exception
*/
public List<Map<String, String>> processAppointSheet(String filename, Integer appointSheetIndex) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(filename, null);
return sheetList.get(appointSheetIndex - 1) ;
}
/**
* 根据文件流解析,指定sheet
* @param is
* @return
* @throws Exception
*/
public List<Map<String, String>> processAppointSheet(InputStream is, Integer appointSheetIndex) throws Exception {
List<List<Map<String, String>>> sheetList = processMultiSheet(null, is) ;
return sheetList.get(appointSheetIndex - 1) ;
}
/**
* 遍历excel下所有的sheet
*
* @param fileName
* @throws Exception
*/
public List<List<Map<String, String>>> processMultiSheet(String fileName, InputStream is) throws Exception {
sheetsList = new ArrayList<List<Map<String, String>>>() ;
if(StringUtils.isNotBlank(fileName)) {
this.fs = new POIFSFileSystem(new FileInputStream(fileName));
}else if(is != null) {
this.fs = new POIFSFileSystem(is);
}else {
throw new RuntimeException("文件参数类型非指定类型,请检查") ;
}
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
return sheetsList;
}
/**
* HSSFListener 监听方法,处理Record 处理每个单元格
*
* @param record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid: // 开始处理每个sheet
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// 如果有需要,则建立子工作簿
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
rowsList = new ArrayList<Map<String, String>>() ;
++ sheetIndex;
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid: // 单元格为空白
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
cellList.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: // 单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); // 如果里面某个单元格含有值,则标识该行不为空行
break;
case FormulaRecord.sid:// 单元格为公式类型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); // 如果里面某个单元格含有值,则标识该行不为空行
break;
case StringRecord.sid: // 单元格中公式的字符串
if (outputNextStringRecord) {
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); // 如果里面某个单元格含有值,则标识该行不为空行
break;
case LabelSSTRecord.sid: // 单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
cellList.add(thisColumn, "");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); // 如果里面某个单元格含有值,则标识该行不为空行
}
break;
case NumberRecord.sid: // 单元格为数字类型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// 第一种方式
// value = formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yy
// h:mm格式,不符合要求
// 第二种方式,参照formatNumberDateCell里面的实现方法编写
Double valueDouble = ((NumberRecord) numrec).getValue();
String formatString = formatListener.getFormatString(numrec);
if (formatString.contains("m/d/yy")) {
formatString = "yyyy-MM-dd hh:mm:ss";
}
int formatIndex = formatListener.getFormatIndex(numrec);
value = formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
value = value.equals("") ? "" : value;
// 向容器加入列值
cellList.add(thisColumn, value);
checkRowIsNull(value); // 如果里面某个单元格含有值,则标识该行不为空行
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
cellList.add(thisColumn, "");
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColums > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
if (curRow == 0) {
headList = new ArrayList<String>();
headList.addAll(cellList);
}
if (flag && curRow != 0) { // 该行不为空行且该行不是第一行
Map<String, String> rowsMap = new HashMap<String, String>();
for (int i = 0; i < headList.size(); i++) {
String colsKey = headList.get(i);
String colsValue = cellList.get(i);
rowsMap.put(colsKey, colsValue);
}
rowsList.add(rowsMap);
if(sheetsList.size() == sheetIndex - 1) {
sheetsList.add(rowsList) ;
}else {
sheetsList.remove(sheetIndex - 1) ;
sheetsList.add(sheetIndex - 1, rowsList);
}
totalRows++;
}
// 清空容器
cellList.clear();
flag = false;
}
}
/**
* 如果里面某个单元格含有值,则标识该行不为空行
*
* @param value
*/
public void checkRowIsNull(String value) {
if (value != null && !"".equals(value)) {
flag = true;
}
}
}
- controller层测试
import com.example.demo.qyt.FileDTO;
import com.example.demo.qyt.old.Import;
import com.example.demo.qyt.service.ReaderService;
import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource;
/**
* 导入数据
*/
@RestController
@RequestMapping("/import")
public class ExcelReaderController {
@Resource
private ReaderService reader;
/**
*导入
*/
@PostMapping("/datas")
public Object importTags(@RequestBody FileDTO fileDTO){
return reader.importTags(fileDTO);
}
}