POI组件事件读入

项目结构

依赖如下

<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>slf4j-api</artifactId>
    <version>1.7.25</version>
</dependency>
<dependency>
    <groupId>org.slf4j</groupId>
    <artifactId>slf4j-log4j12</artifactId>
    <version>1.7.25</version>
    <scope>test</scope>
</dependency>
<dependency>
    <groupId>log4j</groupId>
    <artifactId>log4j</artifactId>
    <version>1.2.17</version>
    <scope>test</scope>
</dependency>
<dependency>
    <groupId>junit</groupId>
    <artifactId>junit</artifactId>
    <version>4.12</version>
    <scope>test</scope>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.0.1</version>
</dependency>

log4j.properties

### 设置###
log4j.rootLogger = debug,stdout

### 输出信息到控制抬 ###
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n

如何使用

# 实用组件
## components-office 
添加依赖
\<dependency\>
\<groupId\>com.blueorigin.common\</groupId\>
\<artifactId\>components-office\</artifactId\>
\<version\>latest\</version\>
\</dependency\>
### 版本信息
>- v_1.0.2 提供对Excel2007以后的.xlsx的读取功能
### quick-start
对应包:com.blueorigin.common.components.office
xlsx读取:
1. 创建model extends UserRow, model使用annotation包下的注解标注列的别名
2. 创建handler implements UserRowHandler<? extends UserRow>
3. 基于1、2实例化组件提供的默认事件监听器 DefaultEventListener
4. 构建XlsxExcelReader 并调用其parse方法解析文件流
5. 获取解析结果
Excel每行数据的处理行为都在第2步中的handler中处理。另外components-office 支持客户端传入一个阻塞队列将读取到的UserRow put进队列,客户端从该阻塞队列获取数据自行处理。
用例参考:

 

package: com.blueorigin.common.components.office.annotation

package com.blueorigin.common.components.office.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * 表头列的别名,alais对应Excel表的中文别名
 * 当设置了别名,解析器将该列下的值注入到对应的别名字段,如果没有设置别名,则依据字段变量名设置
 * 当字段被注解 @ColumnReadIgnore 标注,则该字段不被解析注入
 * 当字段被注解 @ColumnWriteIgnore 标注,则该字段不被生成器写入Excel
 *
 * @see ColumnReadIgnore
 * @see ColumnWriteIgnore
 */
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnAlias {
    String value();
}
package com.blueorigin.common.components.office.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * 标明该字段不会被注入器所识别,
 * Excel应避免使用该列名,参考ColumnAlias
 */
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnReadIgnore {
}
package com.blueorigin.common.components.office.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * 标明该字段不会被注入器所识别,
 * Excel应避免使用该列名,参考ColumnAlias
 */
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnWriteIgnore {
}

package: com.blueorigin.common.components.office.exceptions

package com.blueorigin.common.components.office.exceptions;

import org.xml.sax.SAXException;

public class OfficeReflectionException extends SAXException {
    public OfficeReflectionException(String message) {
        super("office reflection exception: " + message);
    }
}
package com.blueorigin.common.components.office.exceptions;

import org.xml.sax.SAXException;

/**
 * 解析Excel的model如果不满足字段类型要求,则抛出此异常
 */
public class UnsupportedTypeException extends SAXException {

    public UnsupportedTypeException(String type) {
        super("office reader not support type of " + type);
    }
}

com.blueorigin.common.components.office.support

package com.blueorigin.common.components.office.support;

public class SheetResult {
    private int    sheetRows;      // 工作表总行数
    private int    sheetBlankRows; // 工作表空行数
    private int    sheetIndex;     // 工作表索引
    private String sheetName;      // 工作表名

    public SheetResult(int sheetRows, int sheetBlankRows, int sheetIndex, String sheetName) {
        this.sheetRows = sheetRows;
        this.sheetBlankRows = sheetBlankRows;
        this.sheetIndex = sheetIndex;
        this.sheetName = sheetName;
    }

    public int getSheetRows() {
        return sheetRows;
    }

    public void setSheetRows(int sheetRows) {
        this.sheetRows = sheetRows;
    }

    public int getSheetBlankRows() {
        return sheetBlankRows;
    }

    public void setSheetBlankRows(int sheetBlankRows) {
        this.sheetBlankRows = sheetBlankRows;
    }

    public int getSheetIndex() {
        return sheetIndex;
    }

    public void setSheetIndex(int sheetIndex) {
        this.sheetIndex = sheetIndex;
    }

    public String getSheetName() {
        return sheetName;
    }

    public void setSheetName(String sheetName) {
        this.sheetName = sheetName;
    }

    @Override
    public String toString() {
        return "SheetResult{" +
                "sheetRows=" + sheetRows +
                ", sheetBlankRows=" + sheetBlankRows +
                ", sheetIndex=" + sheetIndex +
                ", sheetName='" + sheetName + '\'' +
                '}';
    }
}
package com.blueorigin.common.components.office.support;

/**
 * 所有的Excel映射类都需要继承这个类,并且映射类必须是最终类,否者可导致字段注入失败
 */
public class UserRow extends UserSheet {

    private int rowNum;

    public int getRowNum() {
        return rowNum;
    }

    public void setRowNum(int rowNum) {
        this.rowNum = rowNum;
    }
}
package com.blueorigin.common.components.office.support;

/**
 * 用户编程接口,实现该类对象并将其注入到UserSheetEventListener中去
 * UserSheetEventListener#endRow会调用该方法并将从Excel row中解析到的对象作为参数调用该方法
 *
 * @param <T> row 映射的pojo
 */
public interface UserRowHandler<T extends UserRow> {
    void handle(T t) throws Exception;

    Class<T> getSubClass();
}
package com.blueorigin.common.components.office.support;

public class UserSheet {

    private String sheetName;

    public String getSheetName() {
        return sheetName;
    }

    public void setSheetName(String sheetName) {
        this.sheetName = sheetName;
    }
}
package com.blueorigin.common.components.office.support;

import org.xml.sax.SAXException;

/**
 * 用户编程接口
 */
public interface UserSheetEventListener {
    /**
     * A row with the (one based) row number has started
     * 需要创建要给单元格值的容器,通常是一个对象
     */
    void startRow(int rowNum, String sheetName) throws SAXException;

    /**
     * A row with the (one based) row number has ended
     * 处理创建好的对象
     */
    void endRow(int rowNum, String sheetName, boolean isRowBlank);

    /**
     * 如果要获取单元格的值,那么需要实现这个方法。一般单元格的值对应对象中的字段。
     *
     * @param rowNum 行号
     * @param colPrefix 列码
     * @param cellReference 单元格引用 = 列表+行号  如 A1 B2
     * @param formattedValue 格式化的单元格的值
     */
    void cell(int rowNum, String colPrefix, String cellReference,
              String formattedValue) throws SAXException;

    /**
     * Reader iterate the excel workbook and parse each sheet one time
     */
    default void startSheet(String sheetName, int sheetIndex) {

    }

    /**
     * @param sheetName  工作表名称
     * @param sheetIndex 工作表索引 min=1
     * @param sheetRows  工作表总行数
     * @param sheetBlankRows 工作表空行数
     */
    default void endSheet(String sheetName, int sheetIndex, int sheetRows, int sheetBlankRows) {
    }
}
package com.blueorigin.common.components.office.support;

import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * @desc POI读取excel有两种模式,一种是用户模式,一种是事件驱动模式
 * 采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题,
 * 该模式是POI官方推荐的读取大数据的模式,
 * 在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出
 * <p>
 * 用于解决.xlsx2007版本大数据量问题
 *
 *
 * example:
 *
 * DefaultEventListener<ColumnModel> handler = new DefaultEventListener<>(
 * ColumnModel.class, columnModel -> System.out.println(columnModel));
 * XlsxExcelReader xlsxExcelHandler = new XlsxExcelReader(handler, patten);
 * xlsxExcelHandler.parse(in);
 *
 **/
public class XlsxExcelReader extends DefaultHandler {

    Logger logger = LoggerFactory.getLogger(this.getClass());

    /**
     * 单元格中的数据可能的数据类型
     */
    enum CellDataType {
                       BOOLEAN, ERROR, FORMULA, INLINE_STRING, SST_STRING, NUMBER, DATE
    }

    private int                          sheetIndex   = 0;                              // 工作表索引
    private String                       sheetName    = "";                             // 工作表名
    private int                          sheetRows;                                     // 工作表总行数
    private int                          sheetBlankRows;                                // 工作表空行数

    private final DataFormatter          formatter    = new DataFormatter(Locale.CHINA);

    private short                        formatIndex;                                   // 单元格格式的索引
    private String                       formatString;                                  // 单元格格式字符串
    private String                       dateFormatString;                              // 自定义的日期格式字符串

    private StylesTable                  stylesTable;                                   // 单元格
    private SharedStringsTable           sst;                                           // 共享字符串表

    private final UserSheetEventListener output;                                        // 回调

    // Gathers characters as they are seen.
    private StringBuilder                value        = new StringBuilder(64);

    private String                       colPrefix;                                     // cellRef = colPrefix + rowNum
    private int                          preRowNum;                                     // 用于解决有些Excel读取时自动过滤空行的问题
    private int                          rowNum;
    private int                          nextRowNum;                                    // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
    private boolean                      isRowBlank;                                    // 标记当前行是否是空行

    private String                       cellRef;                                       // 当前位置

    private boolean                      isIsOpen;
    private CellDataType                 nextDataType = CellDataType.NUMBER;            // 单元格数据类型

    public XlsxExcelReader(UserSheetEventListener sheetContentsHandler) {
        this.output = sheetContentsHandler;
    }

    /**
     * @param sheetContentsHandler
     * @param dateFormatString 如果需要指定日期格式,为null时,使用Excel指定的格式。注意有些中文日期格式无法正确解析
     */
    public XlsxExcelReader(UserSheetEventListener sheetContentsHandler, String dateFormatString) {
        this.output = sheetContentsHandler;
        this.dateFormatString = dateFormatString;
    }

    /**
     * 解析excel, 由于采用的是事件模式,Excel被转换成xml,并被依次读取。
     * 读到相应的标签或内容则产生对应的事件通知 @link http://www.saxproject.org/event.html
     */
    public Map<Integer, SheetResult> parse(InputStream in) throws IOException, OpenXML4JException,
                                                           ParserConfigurationException,
                                                           SAXException {
        OPCPackage pkg = OPCPackage.open(in);
        XSSFReader xssfReader = new XSSFReader(pkg);
        this.stylesTable = xssfReader.getStylesTable();
        this.sst = xssfReader.getSharedStringsTable();
        XMLReader parser = SAXHelper.newXMLReader();
        parser.setContentHandler(this);
        XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

        Map<Integer, SheetResult> resultMap = new HashMap<>();

        while (sheets.hasNext()) {
            preRowNum = 0;
            rowNum = 0;
            nextRowNum = 0;
            sheetRows = 0;
            sheetBlankRows = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            sheetName = sheets.getSheetName();
            InputSource sheetSource = new InputSource(sheet);

            //解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行
            parser.parse(sheetSource);
            sheet.close();

            resultMap.put(sheetIndex,
                new SheetResult(sheetRows - 1, sheetBlankRows, sheetIndex, sheetName));
        }

        return resultMap;
    }

    private boolean isTextTag(String name) {
        if ("v".equals(name)) {
            // Easy, normal v text tag
            return true;
        } else if ("inlineStr".equals(name)) {
            // Easy inline string
            return true;
        } else if ("t".equals(name) && isIsOpen) {
            // Inline string <is><t>...</t></is> pair
            return true;
        } else {
            // It isn't a text tag
            return false;
        }
    }

    /**
     * 标签属性在开始标签中,因此需要将这些属性记录下来,供其他方法使用
     */
    @Override
    public void startElement(String uri, String localName, String qName,
                             Attributes attributes) throws SAXException {

        if (isTextTag(localName)) {
            // Clear contents cache
            value.setLength(0);
            isRowBlank = false;
        } else if ("is".equals(localName)) {
            // Inline string outer tag
            isIsOpen = true;
        } else if ("row".equals(localName)) { // 获取行标签
            String rowNumStr = attributes.getValue("r");
            preRowNum = rowNum;
            if (rowNumStr != null) {
                rowNum = Integer.parseInt(rowNumStr); // 从1开始计数
            } else {
                rowNum = nextRowNum;
            }
            isRowBlank = true; // reset flag
            output.startRow(rowNum, sheetName);
        } else if ("c".equals(localName)) { // 获取列标签
            // Set up defaults.
            this.nextDataType = CellDataType.NUMBER;
            this.formatIndex = -1;
            this.formatString = null;

            // A1 A2 B1
            cellRef = attributes.getValue("r");

            if (cellRef != null) {
                colPrefix = cellRef.substring(0, 1);
            }

            // 类型
            // 不是字符串,t都没有
            String cellType = attributes.getValue("t");

            // 单元格格式
            // 比如 '123 带’开头的字符串 是s="6", 日期格式有1,2,5,6等
            String cellStyleStr = attributes.getValue("s");

            if ("b".equals(cellType))
                nextDataType = CellDataType.BOOLEAN;
            else if ("e".equals(cellType))
                nextDataType = CellDataType.ERROR;
            else if ("inlineStr".equals(cellType))
                nextDataType = CellDataType.INLINE_STRING;
            else if ("s".equals(cellType))
                nextDataType = CellDataType.SST_STRING;
            else if ("str".equals(cellType))
                nextDataType = CellDataType.FORMULA;
            else {
                // Number, but almost certainly with a special style or format
                XSSFCellStyle style = null;
                if (stylesTable != null) {
                    if (cellStyleStr != null) {
                        int styleIndex = Integer.parseInt(cellStyleStr);
                        style = stylesTable.getStyleAt(styleIndex);
                    } else if (stylesTable.getNumCellStyles() > 0) {
                        style = stylesTable.getStyleAt(0);
                    }
                }
                if (style != null) {
                    this.formatIndex = style.getDataFormat();
                    this.formatString = style.getDataFormatString();
                    if (this.formatString == null)
                        this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
                }

                if (dateFormatString != null && formatString != null) {
                    if (formatString.contains("yyyy") || formatString.contains("mmm")) {
                        nextDataType = CellDataType.DATE;
                        if (!formatString.equals(dateFormatString))
                            formatString = dateFormatString;
                    }
                }
            }
        } else if ("sheetData".equals(localName)) {
            output.startSheet(sheetName, sheetIndex);
        }
    }

    /**
     * 第二个执行
     * 得到单元格对应的索引值或是内容值
     * 如果单元格类型是字符串、INLINE_STRING、数字、日期,lastIndex则是索引值
     * 如果单元格类型是布尔值、错误、公式,lastIndex则是内容值
     * <p>
     * SAX:Note that a SAX driver is free to chunk the character data any way it wants,
     * so you cannot count on all of the character data content of an element arriving in a single characters event.
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        value.append(ch, start, length);
    }

    /**
     * 第三个执行,取数逻辑都在这里
     *
     * @param uri
     * @param localName
     * @param name
     * @throws SAXException
     */
    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {

        if (uri != null && !uri.equals(NS_SPREADSHEETML)) {
            return;
        }

        String thisStr = null;

        // v => contents of a cell
        if (isTextTag(localName)) {
            switch (nextDataType) {
                case BOOLEAN:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "FALSE" : "TRUE";
                    break;
                case ERROR:
                    thisStr = "ERROR:" + value;
                    break;
                case INLINE_STRING:
                    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
                    thisStr = rtsi.toString();
                    break;
                case SST_STRING:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        RichTextString rtss = sst.getItemAt(idx);
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        logger.error("Failed to parse SST index ()", sstIndex, ex);
                    }
                    break;

                case NUMBER:
                case DATE:
                    String n = value.toString();
                    if (this.formatString != null && n.length() > 0)
                        thisStr = formatter.formatRawCellContents(Double.parseDouble(n),
                            this.formatIndex, this.formatString);
                    else
                        thisStr = n;
                    break;

                default:
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
            }

            // Output
            output.cell(rowNum, colPrefix, cellRef, thisStr);
        } else if ("row".equals(localName)) {
            // Finish up the row
            output.endRow(rowNum, sheetName, isRowBlank);
            // some sheets do not have rowNum set in the XML,
            // Excel can read them so we should try to read them as well
            nextRowNum = rowNum + 1;
            sheetRows = rowNum;
            if (isRowBlank) {
                sheetBlankRows++;
            } else if (preRowNum + 1 < rowNum) {
                sheetBlankRows += rowNum - preRowNum - 1;
            }
        } else if ("sheetData".equals(localName)) {
            // indicate that this sheet is now done
            output.endSheet(sheetName, sheetIndex, sheetRows - 1, sheetBlankRows);
        }
    }

}

com.blueorigin.common.components.office

package com.blueorigin.common.components.office;

import java.util.Collection;
import java.util.HashSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;

import com.blueorigin.common.components.office.support.UserRow;
import com.blueorigin.common.components.office.support.UserRowHandler;

/**
 * 这是框架提供的并发处理的封装类,实际使用时,只需要将真正的执行器应用到包装器即可
 */
public class ConcurrentUserRowHandler<T extends UserRow> extends GenericEventListener<T>
                                     implements Callable<Void> {

    /**
     * 阻塞队列大小,建议与 threadPoolSize 相匹配
     */
    private static final int  DEFAULT_BLOCKING_QUEUE_SIZE = 1024;

    /**
     * 并发线程大小
     */
    private int               threadPoolSize;

    /**
     * 创建阻塞队列,阻塞队列是Excel解析程序和解析数据处理程序的桥梁
     */
    private BlockingQueue<T>  blockQueue;

    /**
     * 真正的处理类
     */
    private UserRowHandler<T> realHandler;

    /**
     * 文件解析完成标记
     * */
    private AtomicBoolean     isComplete;

    /**
     * 解析器已经完成所有解析, 调用此方法通知执行器, 这是为了释放线程
     */
    public void setComplete() {
        this.isComplete.set(true);
    }

    public ConcurrentUserRowHandler(UserRowHandler<T> realHandler, int threadPoolSize) {
        this(realHandler, DEFAULT_BLOCKING_QUEUE_SIZE, threadPoolSize);
    }

    /**
     * @param capacity 大于0的整数
     * @param threadPoolSize
     */
    public ConcurrentUserRowHandler(UserRowHandler<T> realHandler, int capacity,
                                    int threadPoolSize) {
        this.blockQueue = new ArrayBlockingQueue<>(capacity);
        this.isComplete = new AtomicBoolean(false);
        this.realHandler = realHandler;
        this.threadPoolSize = threadPoolSize;

        Executors.newSingleThreadExecutor().execute(this::concurrentConsume);
    }
    // 并发执行

    private void concurrentConsume() {
        try {
            Collection<Callable<Void>> tasks = new HashSet<>();
            for (int i = 0; i < threadPoolSize; i++) {
                // 这里后续可以根据TASK类型进行替换实现,记得抽取基类
                tasks.add(this);
            }
            ExecutorService executorService = Executors.newFixedThreadPool(threadPoolSize);
            executorService.invokeAll(tasks);
            executorService.shutdown();
            logger.info("ExecutorService shut down now");
        } catch (InterruptedException e) {
            logger.error("consume fail", e);
        }
    }

    @Override
    public void handle(T t) throws InterruptedException {
        blockQueue.put(t);
    }

    /**
     * stubun
     */
    @Override
    public Class<T> getSubClass() {
        return realHandler.getSubClass();
    }

    @Override
    public Void call() throws Exception {
        assert this.blockQueue != null;
        assert this.isComplete != null;

        while (true) {
            if (this.isComplete.get()) {
                if (blockQueue.isEmpty())
                    return null;
            }

            T poll = blockQueue.poll();
            if (poll != null) {
                realHandler.handle(poll);
            }

        }
    }

}
package com.blueorigin.common.components.office;

import java.lang.reflect.Field;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.blueorigin.common.components.office.annotation.ColumnAlias;
import com.blueorigin.common.components.office.annotation.ColumnReadIgnore;
import com.blueorigin.common.components.office.exceptions.OfficeReflectionException;
import com.blueorigin.common.components.office.exceptions.UnsupportedTypeException;
import com.blueorigin.common.components.office.support.UserRow;
import com.blueorigin.common.components.office.support.UserRowHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

import com.blueorigin.common.components.office.support.UserSheetEventListener;

/**
 * 不需要并发? 直接实现这个抽象类即可
 */
public abstract class GenericEventListener<T extends UserRow>
                                          implements UserSheetEventListener, UserRowHandler<T> {

    protected final Logger           logger         = LoggerFactory.getLogger(this.getClass());

    private T                        current;

    /**
     * kv of colPrefix,fieldName
     */
    private final Map<String, Field> fieldHeaderMap = new HashMap<>();

    /**
     * kv of field,Type
     */
    private final Map<Field, Type>   fieldTypeMap   = new HashMap<>();

    /**
     * model所支持的类型必须是 int boolean double 及其包装类型 和 String
     */
    private final List<Type>         supportedTypes = new ArrayList<>();

    private long                     processStartTime;

    {
        supportedTypes.add(String.class);
        supportedTypes.add(Double.class);
        supportedTypes.add(Integer.class);
        supportedTypes.add(Boolean.class);
        supportedTypes.add(double.class);
        supportedTypes.add(int.class);
        supportedTypes.add(boolean.class);
    }

    public GenericEventListener() {
    }

    private void initReflect(String colPrefix, String headerValue) throws UnsupportedTypeException {

        headerValue = headerValue.trim();

        Field[] fields = getSubClass().getDeclaredFields();
        for (Field field : fields) {
            Type fieldType = field.getType();
            if (!supportedTypes.contains(fieldType)) {
                throw new UnsupportedTypeException(fieldType.getTypeName());
            }

            boolean correctSet = false;

            ColumnReadIgnore annotation1 = field.getAnnotation(ColumnReadIgnore.class);
            if (annotation1 != null) {
                continue;
            }

            ColumnAlias annotation = field.getAnnotation(ColumnAlias.class);
            if (annotation != null) {
                String value = annotation.value().trim();
                if (value.equals(headerValue)) {
                    fieldHeaderMap.put(colPrefix, field);
                    correctSet = true;
                }

            }

            if (!correctSet) {
                String name = field.getName();
                if (name.equals(headerValue)) {
                    fieldHeaderMap.put(colPrefix, field);
                    correctSet = true;
                }
            }

            if (correctSet) {
                field.setAccessible(true);
                fieldTypeMap.put(field, fieldType);
                return;
            }

        }
    }

    @Override
    public void startRow(int rowNum, String sheetName) throws SAXException {
        // 构建model
        try {
            if (rowNum > 1) { // 反射解析字段,注解并与Excel表头对应
                current = getSubClass().newInstance();
            }
        } catch (Exception e) {
            throw new OfficeReflectionException(e.getMessage());
        }
    }

    @Override
    public void endRow(int rowNum, String sheetName, boolean isRowBlank) {
        if (!isRowBlank && current != null) {
            current.setSheetName(sheetName);
            current.setRowNum(rowNum);
            try {
                handle(current);
            } catch (Exception e) {
                logger.error("handle model exception, rowNum = {}", rowNum, e);
            }
        }
    }

    @Override
    public void cell(int rowNum, String colPrefix, String cellReference,
                     String formattedValue) throws SAXException {
        if (rowNum == 1) { // 反射解析字段,注解并与Excel表头对应
            initReflect(colPrefix, formattedValue);
            return;
        }

        String typeName = "";
        try {
            Field field = fieldHeaderMap.get(colPrefix);
            if (field != null) {
                Type type = fieldTypeMap.get(field);
                typeName = type.getTypeName();
                if (Integer.class.equals(type) || int.class.equals(type)) {
                    field.set(current, Integer.valueOf(formattedValue));
                } else if (Boolean.class.equals(type) || boolean.class.equals(type)) {
                    field.set(current, Boolean.valueOf(formattedValue));
                } else if (Double.class.equals(type) || double.class.equals(type)) {
                    field.set(current, Double.valueOf(formattedValue));
                } else if (String.class.equals(type)) {
                    field.set(current, formattedValue);
                } else {
                    throw new UnsupportedTypeException(typeName);
                }
            }
        } catch (UnsupportedTypeException e) {
            throw new UnsupportedTypeException(typeName);
        } catch (IllegalAccessException e) {
            throw new OfficeReflectionException(e.getMessage());
        }
    }

    @Override
    public void startSheet(String sheetName, int sheetIndex) {
        logger.info("sheet parse started for {}", sheetName);
        processStartTime = System.currentTimeMillis();
    }

    @Override
    public void endSheet(String sheetName, int sheetIndex, int sheetRows, int sheetBlankRows) {
        long processEndTime = System.currentTimeMillis();
        String userTime = "";
        if (processEndTime > 0 && processStartTime > 0) {
            long useTime = processEndTime - processStartTime;
            long mm = useTime % 1000;
            long seconds = useTime / 1000;//除以1000是为了转换成秒
            long minutes = seconds / 60; // 多少分
            long hours = minutes / 60 / 60; //  多少小时

            userTime = "毫秒" + userTime;
            userTime = mm + userTime;
            if (seconds > 0) {
                userTime = "秒" + userTime;
                userTime = seconds + userTime;
                if (minutes > 0) {
                    userTime = "分钟" + userTime;
                    userTime = minutes + userTime;
                    if (hours > 0) {
                        userTime = "小时" + userTime;
                        userTime = hours + userTime;
                    }
                }
            }

        }

        logger.info("sheet parse end for {}, total rows = {}, blank rows = {}, use {}", sheetName,
            sheetRows, sheetBlankRows, userTime);
    }
}
package com.blueorigin.common.components.office;

import java.io.InputStream;
import java.util.Map;

import com.blueorigin.common.components.office.support.SheetResult;
import com.blueorigin.common.components.office.support.UserSheetEventListener;
import com.blueorigin.common.components.office.support.XlsxExcelReader;

/**
 * 核心开发类,参考如下代码:
 *
 * <pre>
 *     {@code
 *
 *         // Excel内部使用的日期格式
 *         String patten = "yyyy-MM-dd HH:mm:ss";
 *
 *         UserRowHandler<ColumnModel> realHandler = userRow -> System.out.println(userRow);
 *
 *         ConcurrentUserRowHandler<ColumnModel> handler = new ConcurrentUserRowHandler(
 *             ColumnModel.class, realHandler, 5);
 *
 *         XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
 *         try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
 *             Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
 *             SheetResult sheetResult = parse.get(1);
 *             assert sheetResult != null;
 *             assert sheetResult.getSheetRows() == sheetRows;
 *             assert sheetResult.getSheetBlankRows() == sheetBlankRows;
 *         } catch (Exception e) {
 *             logger.error("test fail", e);
 *         }
 *
 *     }
 * </pre>
 */
public class XlsxExcelParser {

    private final XlsxExcelReader        reader;

    private final UserSheetEventListener output;

    /**
     * 没有日期字段调用这个构造
     */
    public XlsxExcelParser(UserSheetEventListener sheetContentsHandler) {
        output = sheetContentsHandler;
        reader = new XlsxExcelReader(sheetContentsHandler);
    }

    /**
     * Excel中有日期时可以指定日期格式
     *
     * @param sheetContentsHandler
     * @param dateFormatString 如果需要指定日期格式,为null时,使用Excel指定的格式。注意有些中文日期格式无法正确解析
     */
    public XlsxExcelParser(UserSheetEventListener sheetContentsHandler, String dateFormatString) {
        output = sheetContentsHandler;
        reader = new XlsxExcelReader(sheetContentsHandler, dateFormatString);
    }

    /**
     * @param in  不负责关闭流,请客户端自己处理
     * @return
     * @throws Exception
     */
    public Map<Integer, SheetResult> parse(InputStream in) throws Exception {
        Map<Integer, SheetResult> parse = reader.parse(in);

        if (output instanceof ConcurrentUserRowHandler) {
            ConcurrentUserRowHandler concurrentUserRowHandler = (ConcurrentUserRowHandler) output;
            concurrentUserRowHandler.setComplete();
        }

        return parse;
    }

}

 

test

package com.blueorigin.common.components.office.sax;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import com.blueorigin.common.components.office.XlsxExcelParser;
import com.blueorigin.common.components.office.support.SheetResult;
import com.blueorigin.common.components.office.support.UserRowHandler;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.io.Resources;
import com.blueorigin.common.components.office.ConcurrentUserRowHandler;
import com.blueorigin.common.components.office.support.XlsxExcelReader;

public class XlsxExcelReaderTest {

    private final Logger logger         = LoggerFactory.getLogger(this.getClass());

    private final int    sheetRows      = 6640;
    private final int    sheetBlankRows = 24;

    /**
     * 重构时更改并跑通这个test
     */
    @Test
    public void mainTest() {
        String patten = "yyyy-MM-dd HH:mm:ss";

        ColumnModelEventListener handler = new ColumnModelEventListener();

        XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
        try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
            Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
            SheetResult sheetResult = parse.get(1);
            assert sheetResult != null;
            assert sheetResult.getSheetRows() == sheetRows;
            assert sheetResult.getSheetBlankRows() == sheetBlankRows;
        } catch (Exception e) {
            logger.error("test fail", e);
        }
    }

    @Test
    public void mainTest2() {
        String patten = "yyyy-MM-dd HH:mm:ss";

        UserRowHandler<ColumnModel> realHandler = new UserRowHandler<ColumnModel>() {
            @Override
            public void handle(ColumnModel columnModel) throws Exception {
                System.out.println(columnModel);
            }

            @Override
            public Class<ColumnModel> getSubClass() {
                return ColumnModel.class;
            }
        };

        ConcurrentUserRowHandler<ColumnModel> handler = new ConcurrentUserRowHandler<>(realHandler,
            5);

        XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
        try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
            Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
            SheetResult sheetResult = parse.get(1);
            assert sheetResult != null;
            assert sheetResult.getSheetRows() == sheetRows;
            assert sheetResult.getSheetBlankRows() == sheetBlankRows;
        } catch (Exception e) {
            logger.error("test fail", e);
        }
    }

    @Test
    public void testType() {
        final List<Class> supportedTypes = new ArrayList<>();
        supportedTypes.add(String.class);
        supportedTypes.add(Double.class);
        supportedTypes.add(Integer.class);
        supportedTypes.add(Boolean.class);
        supportedTypes.add(double.class);
        supportedTypes.add(int.class);
        supportedTypes.add(boolean.class);

        System.out.println(supportedTypes.contains(String.class));
        System.out.println(supportedTypes.contains(Double.class));
        System.out.println(supportedTypes.contains(Integer.class));
        System.out.println(supportedTypes.contains(Boolean.class));
        System.out.println(supportedTypes.contains(int.class));
        System.out.println(supportedTypes.contains(double.class));
        System.out.println(supportedTypes.contains(boolean.class));
    }

    /**
     * 打印EXCEL的API内容, 开发该组件时使用的。
     */
    @Test
    public void testUserSheetEventListener() {
        String patten = "yyyy-MM-dd HH:mm:ss";
        TestSheetContenctHandler testSheetContenctHandler = new TestSheetContenctHandler();
        XlsxExcelReader xlsxExcelHandler = new XlsxExcelReader(testSheetContenctHandler, patten);
        try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
            xlsxExcelHandler.parse(in);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
package com.blueorigin.common.components.office.sax;

import com.blueorigin.common.components.office.annotation.ColumnAlias;
import com.blueorigin.common.components.office.annotation.ColumnReadIgnore;
import com.blueorigin.common.components.office.support.UserRow;

public final class ColumnModel extends UserRow {
    @ColumnAlias("姓名")
    private String  name;
    @ColumnAlias("手机号")
    private String  mobile;
    @ColumnAlias("性别")
    private Boolean gender;
    @ColumnAlias("年龄")
    private Integer age;
    @ColumnAlias("身高")
    @ColumnReadIgnore
    private Double  height;

    private String  testField = "testField";

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getMobile() {
        return mobile;
    }

    public void setMobile(String mobile) {
        this.mobile = mobile;
    }

    public Boolean getGender() {
        return gender;
    }

    public void setGender(Boolean gender) {
        this.gender = gender;
    }

    public Integer getAge() {
        return age;
    }

    public void setAge(Integer age) {
        this.age = age;
    }

    public Double getHeight() {
        return height;
    }

    public void setHeight(Double height) {
        this.height = height;
    }

    @Override
    public String toString() {
        return "ColumnModel{" + "name='" + name + '\'' + ", mobile='" + mobile + '\'' + ", gender="
               + gender + ", age=" + age + ", height=" + height + ", testField='" + testField + '\''
               + "} ";
    }
}
package com.blueorigin.common.components.office.sax;

import com.blueorigin.common.components.office.GenericEventListener;

/**
 * 一行一行执行
 */
public class ColumnModelEventListener extends GenericEventListener<ColumnModel> {

    @Override
    public void handle(ColumnModel columnModel) {
        System.out.println(columnModel);
    }

    @Override
    public Class<ColumnModel> getSubClass() {
        return ColumnModel.class;
    }
}

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值