excel 导入(二)

1、excel其实就是一个压缩的xml,将文件的后缀改为zip之后解压可以看到excel的真正面貌。

211103_LGVx_2282993.png

211131_m97w_2282993.png

2、解析excel文件依赖的jar.

<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi</artifactId>
   <version>3.9</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-ooxml</artifactId>
   <version>3.9</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-ooxml-schemas</artifactId>
   <version>3.9</version>
</dependency>

3、解析工具类,需继承DefaultHandler

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @ClassName ExcelHandel
 * @Description
 * @Author xiangnan.xu
 * @DATE 2017/10/11 15:07
 */
public class ExcelHandel extends DefaultHandler {
    private static final Logger logger = LoggerFactory.getLogger(ExcelHandel.class);
    //共享字符串表
    private SharedStringsTable sst;
    //上一次的内容
    private String lastContents;
    private boolean nextIsString;
    private Map<String,String> rowMap;
    private List<Map<String,String>> sucRowList = new ArrayList<>();
    private List<Map<String,String>> failRowList = new ArrayList<>();
    //当前行
    private int curRow = 0;
    //当前列
    private int curCol = 0;
    // 当前遍历的Excel单元格列索引
    protected int thisColumnIndex = -1;
    private String defaultStr = "";
    private boolean listIsNull = true;
    protected int total = 0;  //总行数
    private int totalCol = 0;//总列数
    protected String[] headers;
    private HandleCallback callBack;
    private int successNum = 0;//验证通过条数
    private StylesTable stylesTable;
    private boolean nextIsDate;
    private SimpleDateFormat simpleDateFormat;
    /**
     * 根据sheetid 解析sheet
     * @param sheetId
     * @throws Exception
     */
    public ParseResult processOneSheetByIndex(InputStream inputStream,int sheetId,String[] headers,HandleCallback callBack) throws Exception {
        logger.info("-------------数据导入处理开始--------------");
        Long start = System.currentTimeMillis();
        OPCPackage pkg = OPCPackage.open(inputStream);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);

        this.headers = headers;
        this.callBack = callBack;
        this.stylesTable = r.getStylesTable();
        // 根据 rId# 查找sheet
        InputStream sheet2 = r.getSheet("rId"+sheetId);
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
        logger.info("-------------数据导入处理结束--------------");
        String time = DateUtils.formatTime(System.currentTimeMillis() - start);
        logger.info("数据导入处理时间 :" + time);
        return new ParseResult(successNum,total);
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst)throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException {
        // c => 单元格
        if ("c".equals(name)) {
            // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
            String cellType = attributes.getValue("t");
            if ("s".equals(cellType)) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
            // 日期格式
            String cellTypeS = attributes.getValue("s");
            nextIsDate = false;// 每次初始化
            if(org.apache.commons.lang3.StringUtils.isNotEmpty(cellTypeS)){
                int styleIndex = Integer.parseInt(cellTypeS);
    //            Short formatIndex = style.getDataFormat();
                String formatString = stylesTable.getStyleAt(styleIndex).getDataFormatString();
                if ("m/d/yy".equals(formatString) || "yyyy/m/d;@".equals(formatString)) {
                    nextIsDate = true;
                    simpleDateFormat = DateUtils.SDF1();
                }else if("yyyy/m/d\\ h:mm;@".equals(formatString)) {
                    nextIsDate = true;
                    simpleDateFormat = DateUtils.SDF3();
                }else {
                    nextIsDate = false;
                }
            }
            String r = attributes.getValue("r");
            int firstDigit = -1;
            for (int c = 0; c < r.length(); ++c) {
                if (Character.isDigit(r.charAt(c))) {
                    firstDigit = c;
                    break;
                }
            }
            thisColumnIndex = nameToColumn(r.substring(0, firstDigit));
        } else if (name.equals("row")) {
            rowMap = new LinkedHashMap<>();
            // 设置行号
            if(listIsNull){
                totalCol = getColumns(attributes.getValue("spans"));
                totalCol = headers.length <= totalCol ? headers.length : 0;
                listIsNull = false;
            }
        }else if (name.equals("dimension")){
            //获得总计录数
            String d = attributes.getValue("ref");
            total = getNumber(d.substring(d.indexOf(":")+1,d.length()));
        }

        // 置空
        lastContents = "";
    }

    public void endElement(String uri, String localName, String name) throws SAXException {
        // 根据SST的索引值的到单元格的真正要存储的字符串
        // 这时characters()方法可能会被调用多次
        if (nextIsString) {
            try {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
            } catch (Exception e) {
//              e.printStackTrace();
            }
        }
        if ("v".equals(name)) {
            paddingNullCell();
            String value = lastContents.trim();
            if(nextIsDate && StringUtils.isNumer(value)){
                Date javaDate = HSSFDateUtil.getJavaDate(Double.valueOf(value));
                value = simpleDateFormat.format(javaDate);
            }
            value = value.equals("")?" ":value;
            if(curCol < headers.length){
                rowMap.put(headers[curCol],value);
            }
            curCol++;
        }else {
            //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
            if (name.equals("row") || "worksheet".equals(name)) {
                boolean flag = true;
                // 到达文件最后
                if("worksheet".equals(name)){
                    total = curRow;
                    flag = false;
                }
                if(flag){
                    // 第一行不作处理
                    if(curRow == 0){
                        curRow++ ;
                        curCol = 0;
                        thisColumnIndex = 0;
                        return;
                    }
                    //对最后的空列做填充
                    for(int i=rowMap.size();i<totalCol;i++){
                        rowMap.put(headers[i],defaultStr);
                    }
                    // 空行处理
                    boolean blankRow = checkBlankRow(rowMap);
                    // 空行处理
                    if(blankRow){
                        total = total - 1;
                    }else{
                        curRow = curRow + 1;
                        // 数据验证
                        boolean validate = callBack.validate(rowMap);
                        if(!validate){
                            failRowList.add(rowMap);
                        }else{
                            successNum ++ ;
                            sucRowList.add(rowMap);
                        }
                    }
                }
                // 2000一批处理
                if((sucRowList.size() > 0 && sucRowList.size() == 2000) || (sucRowList.size() > 0 && curRow == total)){
                    logger.info("【文件导入】,当前正在处理一批成功数据为2000条,当前处理到第{}条,总共{}条",curRow,total);
                    List<Map<String, String>> faileList = callBack.handleSuccessData(sucRowList);
                    if(faileList != null && faileList.size() > 0){
                        failRowList.addAll(faileList);
                        successNum = successNum - faileList.size();
                    }
                    sucRowList.clear();
                }
                if(failRowList.size() > 0 && (failRowList.size() >= 2000 || curRow == total)){
                    logger.info("【文件导入】,当前正在处理一批失败数据为2000条,当前处理到第{}条,总共{}条",curRow,total);
                    callBack.handleFaileData(failRowList);
                    failRowList.clear();
                }
                curCol = 0;
                thisColumnIndex = 0;
            }
        }
    }

    private boolean checkBlankRow(Map<String, String> rowMap) {
        if(rowMap == null){
            return true;
        }
        for (Map.Entry<String, String> row : rowMap.entrySet()) {
            String value = row.getValue();
            // 有一个不为空即不是空行
            if(value != null && !"".equals(replaceBlank(value))){
                return false;
            }
        }
        return true;
    }

    /**
     * 空格替换
     * @param str
     * @return
     */
    private String replaceBlank(String str) {
        String dest = "";
        if (str!=null) {
            Pattern p = Pattern.compile("\\s*|\t|\r|\n");
            Matcher m = p.matcher(str);
            dest = m.replaceAll("");
        }
        return dest;
    }

    private static int getNumber(String column) {
        String c = column.toUpperCase().replaceAll("[A-Z]", "");
        return Integer.parseInt(c);
    }

    public void characters(char[] ch, int start, int length) throws SAXException {
        //得到单元格内容的值
        lastContents += new String(ch, start, length);
    }


    private static int getColumns(String spans) {
        String number = spans.substring(spans.lastIndexOf(':') + 1, spans.length());
        return Integer.parseInt(number);
    }

    /**
     * 空的单元个填充
     */
    private void paddingNullCell() {
        int index = curCol;
        if(thisColumnIndex > index && thisColumnIndex <= headers.length){
            for(int i = index; i < thisColumnIndex; i++){
//                rowlist.add(curCol, "");
                rowMap.put(headers[curCol],"");
                curCol++;
            }
        }
    }
    /**
     * 从列名转换为列索引
     * @param name
     * @return
     */
    private static int nameToColumn(String name) {
        int column = -1;
        for (int i = 0; i < name.length(); ++i) {
            int c = name.charAt(i);
            column = (column + 1) * 26 + c - 'A';
        }
        return column;
    }
}

4、解析返回

/**
 * @ClassName ParseResult
 * @Description
 * @Author xiangnan.xu
 * @DATE 2017/10/12 13:58
 */
public class ParseResult {
    private int successNum;
    private int totalNum;

    public ParseResult() {
    }

    public ParseResult(int successNum, int totalNum) {
        this.successNum = successNum;
        this.totalNum = totalNum;
    }

    public int getSuccessNum() {
        return successNum;
    }

    public void setSuccessNum(int successNum) {
        this.successNum = successNum;
    }

    public int getTotalNum() {
        return totalNum;
    }

    public void setTotalNum(int totalNum) {
        this.totalNum = totalNum;
    }
}

5、解析回调

import java.util.List;
import java.util.Map;

/**
 * @ClassName HandleCallback
 * @Description excel 文件解析回调
 * @Author xiangnan.xu
 * @DATE 2017/6/19 21:03
 */
public interface HandleCallback {
   /**
    * 数据校验
    * @param dataRow
    * @return
    */
   boolean validate(Map<String,String> dataRow);
   /**
    * 成功数据处理
    * @param dataRows
    * @return 返回失败的集合
    */
   List<Map<String,String>> handleSuccessData(List<Map<String, String>> dataRows);
   /**
    * 失败数据处理
    * @param dataRows
    * @return 返回失败的集合
    */
   void handleFaileData(List<Map<String, String>> dataRows);
}

6、内存溢出原因。层尝试改写源码,但是不太方便。如有好方法请告知。

210449_trfj_2282993.png

转载于:https://my.oschina.net/u/2282993/blog/1811090

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值