1、excel其实就是一个压缩的xml,将文件的后缀改为zip之后解压可以看到excel的真正面貌。
2、解析excel文件依赖的jar.
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.9</version> </dependency>
3、解析工具类,需继承DefaultHandler
import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import java.io.InputStream; import java.text.SimpleDateFormat; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @ClassName ExcelHandel * @Description * @Author xiangnan.xu * @DATE 2017/10/11 15:07 */ public class ExcelHandel extends DefaultHandler { private static final Logger logger = LoggerFactory.getLogger(ExcelHandel.class); //共享字符串表 private SharedStringsTable sst; //上一次的内容 private String lastContents; private boolean nextIsString; private Map<String,String> rowMap; private List<Map<String,String>> sucRowList = new ArrayList<>(); private List<Map<String,String>> failRowList = new ArrayList<>(); //当前行 private int curRow = 0; //当前列 private int curCol = 0; // 当前遍历的Excel单元格列索引 protected int thisColumnIndex = -1; private String defaultStr = ""; private boolean listIsNull = true; protected int total = 0; //总行数 private int totalCol = 0;//总列数 protected String[] headers; private HandleCallback callBack; private int successNum = 0;//验证通过条数 private StylesTable stylesTable; private boolean nextIsDate; private SimpleDateFormat simpleDateFormat; /** * 根据sheetid 解析sheet * @param sheetId * @throws Exception */ public ParseResult processOneSheetByIndex(InputStream inputStream,int sheetId,String[] headers,HandleCallback callBack) throws Exception { logger.info("-------------数据导入处理开始--------------"); Long start = System.currentTimeMillis(); OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); this.headers = headers; this.callBack = callBack; this.stylesTable = r.getStylesTable(); // 根据 rId# 查找sheet InputStream sheet2 = r.getSheet("rId"+sheetId); InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); logger.info("-------------数据导入处理结束--------------"); String time = DateUtils.formatTime(System.currentTimeMillis() - start); logger.info("数据导入处理时间 :" + time); return new ParseResult(successNum,total); } public XMLReader fetchSheetParser(SharedStringsTable sst)throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException { // c => 单元格 if ("c".equals(name)) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); if ("s".equals(cellType)) { nextIsString = true; } else { nextIsString = false; } // 日期格式 String cellTypeS = attributes.getValue("s"); nextIsDate = false;// 每次初始化 if(org.apache.commons.lang3.StringUtils.isNotEmpty(cellTypeS)){ int styleIndex = Integer.parseInt(cellTypeS); // Short formatIndex = style.getDataFormat(); String formatString = stylesTable.getStyleAt(styleIndex).getDataFormatString(); if ("m/d/yy".equals(formatString) || "yyyy/m/d;@".equals(formatString)) { nextIsDate = true; simpleDateFormat = DateUtils.SDF1(); }else if("yyyy/m/d\\ h:mm;@".equals(formatString)) { nextIsDate = true; simpleDateFormat = DateUtils.SDF3(); }else { nextIsDate = false; } } String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } thisColumnIndex = nameToColumn(r.substring(0, firstDigit)); } else if (name.equals("row")) { rowMap = new LinkedHashMap<>(); // 设置行号 if(listIsNull){ totalCol = getColumns(attributes.getValue("spans")); totalCol = headers.length <= totalCol ? headers.length : 0; listIsNull = false; } }else if (name.equals("dimension")){ //获得总计录数 String d = attributes.getValue("ref"); total = getNumber(d.substring(d.indexOf(":")+1,d.length())); } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); } catch (Exception e) { // e.printStackTrace(); } } if ("v".equals(name)) { paddingNullCell(); String value = lastContents.trim(); if(nextIsDate && StringUtils.isNumer(value)){ Date javaDate = HSSFDateUtil.getJavaDate(Double.valueOf(value)); value = simpleDateFormat.format(javaDate); } value = value.equals("")?" ":value; if(curCol < headers.length){ rowMap.put(headers[curCol],value); } curCol++; }else { //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row") || "worksheet".equals(name)) { boolean flag = true; // 到达文件最后 if("worksheet".equals(name)){ total = curRow; flag = false; } if(flag){ // 第一行不作处理 if(curRow == 0){ curRow++ ; curCol = 0; thisColumnIndex = 0; return; } //对最后的空列做填充 for(int i=rowMap.size();i<totalCol;i++){ rowMap.put(headers[i],defaultStr); } // 空行处理 boolean blankRow = checkBlankRow(rowMap); // 空行处理 if(blankRow){ total = total - 1; }else{ curRow = curRow + 1; // 数据验证 boolean validate = callBack.validate(rowMap); if(!validate){ failRowList.add(rowMap); }else{ successNum ++ ; sucRowList.add(rowMap); } } } // 2000一批处理 if((sucRowList.size() > 0 && sucRowList.size() == 2000) || (sucRowList.size() > 0 && curRow == total)){ logger.info("【文件导入】,当前正在处理一批成功数据为2000条,当前处理到第{}条,总共{}条",curRow,total); List<Map<String, String>> faileList = callBack.handleSuccessData(sucRowList); if(faileList != null && faileList.size() > 0){ failRowList.addAll(faileList); successNum = successNum - faileList.size(); } sucRowList.clear(); } if(failRowList.size() > 0 && (failRowList.size() >= 2000 || curRow == total)){ logger.info("【文件导入】,当前正在处理一批失败数据为2000条,当前处理到第{}条,总共{}条",curRow,total); callBack.handleFaileData(failRowList); failRowList.clear(); } curCol = 0; thisColumnIndex = 0; } } } private boolean checkBlankRow(Map<String, String> rowMap) { if(rowMap == null){ return true; } for (Map.Entry<String, String> row : rowMap.entrySet()) { String value = row.getValue(); // 有一个不为空即不是空行 if(value != null && !"".equals(replaceBlank(value))){ return false; } } return true; } /** * 空格替换 * @param str * @return */ private String replaceBlank(String str) { String dest = ""; if (str!=null) { Pattern p = Pattern.compile("\\s*|\t|\r|\n"); Matcher m = p.matcher(str); dest = m.replaceAll(""); } return dest; } private static int getNumber(String column) { String c = column.toUpperCase().replaceAll("[A-Z]", ""); return Integer.parseInt(c); } public void characters(char[] ch, int start, int length) throws SAXException { //得到单元格内容的值 lastContents += new String(ch, start, length); } private static int getColumns(String spans) { String number = spans.substring(spans.lastIndexOf(':') + 1, spans.length()); return Integer.parseInt(number); } /** * 空的单元个填充 */ private void paddingNullCell() { int index = curCol; if(thisColumnIndex > index && thisColumnIndex <= headers.length){ for(int i = index; i < thisColumnIndex; i++){ // rowlist.add(curCol, ""); rowMap.put(headers[curCol],""); curCol++; } } } /** * 从列名转换为列索引 * @param name * @return */ private static int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } }
4、解析返回
/** * @ClassName ParseResult * @Description * @Author xiangnan.xu * @DATE 2017/10/12 13:58 */ public class ParseResult { private int successNum; private int totalNum; public ParseResult() { } public ParseResult(int successNum, int totalNum) { this.successNum = successNum; this.totalNum = totalNum; } public int getSuccessNum() { return successNum; } public void setSuccessNum(int successNum) { this.successNum = successNum; } public int getTotalNum() { return totalNum; } public void setTotalNum(int totalNum) { this.totalNum = totalNum; } }
5、解析回调
import java.util.List; import java.util.Map; /** * @ClassName HandleCallback * @Description excel 文件解析回调 * @Author xiangnan.xu * @DATE 2017/6/19 21:03 */ public interface HandleCallback { /** * 数据校验 * @param dataRow * @return */ boolean validate(Map<String,String> dataRow); /** * 成功数据处理 * @param dataRows * @return 返回失败的集合 */ List<Map<String,String>> handleSuccessData(List<Map<String, String>> dataRows); /** * 失败数据处理 * @param dataRows * @return 返回失败的集合 */ void handleFaileData(List<Map<String, String>> dataRows); }
6、内存溢出原因。层尝试改写源码,但是不太方便。如有好方法请告知。