项目结构
依赖如下
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
</dependency>
log4j.properties
### 设置###
log4j.rootLogger = debug,stdout
### 输出信息到控制抬 ###
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
如何使用
# 实用组件
## components-office
添加依赖
\<dependency\>
\<groupId\>com.blueorigin.common\</groupId\>
\<artifactId\>components-office\</artifactId\>
\<version\>latest\</version\>
\</dependency\>
### 版本信息
>- v_1.0.2 提供对Excel2007以后的.xlsx的读取功能
### quick-start
对应包:com.blueorigin.common.components.office
xlsx读取:
1. 创建model extends UserRow, model使用annotation包下的注解标注列的别名
2. 创建handler implements UserRowHandler<? extends UserRow>
3. 基于1、2实例化组件提供的默认事件监听器 DefaultEventListener
4. 构建XlsxExcelReader 并调用其parse方法解析文件流
5. 获取解析结果
Excel每行数据的处理行为都在第2步中的handler中处理。另外components-office 支持客户端传入一个阻塞队列将读取到的UserRow put进队列,客户端从该阻塞队列获取数据自行处理。
用例参考:
package: com.blueorigin.common.components.office.annotation
package com.blueorigin.common.components.office.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 表头列的别名,alais对应Excel表的中文别名
* 当设置了别名,解析器将该列下的值注入到对应的别名字段,如果没有设置别名,则依据字段变量名设置
* 当字段被注解 @ColumnReadIgnore 标注,则该字段不被解析注入
* 当字段被注解 @ColumnWriteIgnore 标注,则该字段不被生成器写入Excel
*
* @see ColumnReadIgnore
* @see ColumnWriteIgnore
*/
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnAlias {
String value();
}
package com.blueorigin.common.components.office.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 标明该字段不会被注入器所识别,
* Excel应避免使用该列名,参考ColumnAlias
*/
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnReadIgnore {
}
package com.blueorigin.common.components.office.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 标明该字段不会被注入器所识别,
* Excel应避免使用该列名,参考ColumnAlias
*/
@Target({ ElementType.FIELD })
@Retention(RetentionPolicy.RUNTIME)
public @interface ColumnWriteIgnore {
}
package: com.blueorigin.common.components.office.exceptions
package com.blueorigin.common.components.office.exceptions;
import org.xml.sax.SAXException;
public class OfficeReflectionException extends SAXException {
public OfficeReflectionException(String message) {
super("office reflection exception: " + message);
}
}
package com.blueorigin.common.components.office.exceptions;
import org.xml.sax.SAXException;
/**
* 解析Excel的model如果不满足字段类型要求,则抛出此异常
*/
public class UnsupportedTypeException extends SAXException {
public UnsupportedTypeException(String type) {
super("office reader not support type of " + type);
}
}
com.blueorigin.common.components.office.support
package com.blueorigin.common.components.office.support;
public class SheetResult {
private int sheetRows; // 工作表总行数
private int sheetBlankRows; // 工作表空行数
private int sheetIndex; // 工作表索引
private String sheetName; // 工作表名
public SheetResult(int sheetRows, int sheetBlankRows, int sheetIndex, String sheetName) {
this.sheetRows = sheetRows;
this.sheetBlankRows = sheetBlankRows;
this.sheetIndex = sheetIndex;
this.sheetName = sheetName;
}
public int getSheetRows() {
return sheetRows;
}
public void setSheetRows(int sheetRows) {
this.sheetRows = sheetRows;
}
public int getSheetBlankRows() {
return sheetBlankRows;
}
public void setSheetBlankRows(int sheetBlankRows) {
this.sheetBlankRows = sheetBlankRows;
}
public int getSheetIndex() {
return sheetIndex;
}
public void setSheetIndex(int sheetIndex) {
this.sheetIndex = sheetIndex;
}
public String getSheetName() {
return sheetName;
}
public void setSheetName(String sheetName) {
this.sheetName = sheetName;
}
@Override
public String toString() {
return "SheetResult{" +
"sheetRows=" + sheetRows +
", sheetBlankRows=" + sheetBlankRows +
", sheetIndex=" + sheetIndex +
", sheetName='" + sheetName + '\'' +
'}';
}
}
package com.blueorigin.common.components.office.support;
/**
* 所有的Excel映射类都需要继承这个类,并且映射类必须是最终类,否者可导致字段注入失败
*/
public class UserRow extends UserSheet {
private int rowNum;
public int getRowNum() {
return rowNum;
}
public void setRowNum(int rowNum) {
this.rowNum = rowNum;
}
}
package com.blueorigin.common.components.office.support;
/**
* 用户编程接口,实现该类对象并将其注入到UserSheetEventListener中去
* UserSheetEventListener#endRow会调用该方法并将从Excel row中解析到的对象作为参数调用该方法
*
* @param <T> row 映射的pojo
*/
public interface UserRowHandler<T extends UserRow> {
void handle(T t) throws Exception;
Class<T> getSubClass();
}
package com.blueorigin.common.components.office.support;
public class UserSheet {
private String sheetName;
public String getSheetName() {
return sheetName;
}
public void setSheetName(String sheetName) {
this.sheetName = sheetName;
}
}
package com.blueorigin.common.components.office.support;
import org.xml.sax.SAXException;
/**
* 用户编程接口
*/
public interface UserSheetEventListener {
/**
* A row with the (one based) row number has started
* 需要创建要给单元格值的容器,通常是一个对象
*/
void startRow(int rowNum, String sheetName) throws SAXException;
/**
* A row with the (one based) row number has ended
* 处理创建好的对象
*/
void endRow(int rowNum, String sheetName, boolean isRowBlank);
/**
* 如果要获取单元格的值,那么需要实现这个方法。一般单元格的值对应对象中的字段。
*
* @param rowNum 行号
* @param colPrefix 列码
* @param cellReference 单元格引用 = 列表+行号 如 A1 B2
* @param formattedValue 格式化的单元格的值
*/
void cell(int rowNum, String colPrefix, String cellReference,
String formattedValue) throws SAXException;
/**
* Reader iterate the excel workbook and parse each sheet one time
*/
default void startSheet(String sheetName, int sheetIndex) {
}
/**
* @param sheetName 工作表名称
* @param sheetIndex 工作表索引 min=1
* @param sheetRows 工作表总行数
* @param sheetBlankRows 工作表空行数
*/
default void endSheet(String sheetName, int sheetIndex, int sheetRows, int sheetBlankRows) {
}
}
package com.blueorigin.common.components.office.support;
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* @desc POI读取excel有两种模式,一种是用户模式,一种是事件驱动模式
* 采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题,
* 该模式是POI官方推荐的读取大数据的模式,
* 在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出
* <p>
* 用于解决.xlsx2007版本大数据量问题
*
*
* example:
*
* DefaultEventListener<ColumnModel> handler = new DefaultEventListener<>(
* ColumnModel.class, columnModel -> System.out.println(columnModel));
* XlsxExcelReader xlsxExcelHandler = new XlsxExcelReader(handler, patten);
* xlsxExcelHandler.parse(in);
*
**/
public class XlsxExcelReader extends DefaultHandler {
Logger logger = LoggerFactory.getLogger(this.getClass());
/**
* 单元格中的数据可能的数据类型
*/
enum CellDataType {
BOOLEAN, ERROR, FORMULA, INLINE_STRING, SST_STRING, NUMBER, DATE
}
private int sheetIndex = 0; // 工作表索引
private String sheetName = ""; // 工作表名
private int sheetRows; // 工作表总行数
private int sheetBlankRows; // 工作表空行数
private final DataFormatter formatter = new DataFormatter(Locale.CHINA);
private short formatIndex; // 单元格格式的索引
private String formatString; // 单元格格式字符串
private String dateFormatString; // 自定义的日期格式字符串
private StylesTable stylesTable; // 单元格
private SharedStringsTable sst; // 共享字符串表
private final UserSheetEventListener output; // 回调
// Gathers characters as they are seen.
private StringBuilder value = new StringBuilder(64);
private String colPrefix; // cellRef = colPrefix + rowNum
private int preRowNum; // 用于解决有些Excel读取时自动过滤空行的问题
private int rowNum;
private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
private boolean isRowBlank; // 标记当前行是否是空行
private String cellRef; // 当前位置
private boolean isIsOpen;
private CellDataType nextDataType = CellDataType.NUMBER; // 单元格数据类型
public XlsxExcelReader(UserSheetEventListener sheetContentsHandler) {
this.output = sheetContentsHandler;
}
/**
* @param sheetContentsHandler
* @param dateFormatString 如果需要指定日期格式,为null时,使用Excel指定的格式。注意有些中文日期格式无法正确解析
*/
public XlsxExcelReader(UserSheetEventListener sheetContentsHandler, String dateFormatString) {
this.output = sheetContentsHandler;
this.dateFormatString = dateFormatString;
}
/**
* 解析excel, 由于采用的是事件模式,Excel被转换成xml,并被依次读取。
* 读到相应的标签或内容则产生对应的事件通知 @link http://www.saxproject.org/event.html
*/
public Map<Integer, SheetResult> parse(InputStream in) throws IOException, OpenXML4JException,
ParserConfigurationException,
SAXException {
OPCPackage pkg = OPCPackage.open(in);
XSSFReader xssfReader = new XSSFReader(pkg);
this.stylesTable = xssfReader.getStylesTable();
this.sst = xssfReader.getSharedStringsTable();
XMLReader parser = SAXHelper.newXMLReader();
parser.setContentHandler(this);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
Map<Integer, SheetResult> resultMap = new HashMap<>();
while (sheets.hasNext()) {
preRowNum = 0;
rowNum = 0;
nextRowNum = 0;
sheetRows = 0;
sheetBlankRows = 0;
sheetIndex++;
InputStream sheet = sheets.next();
sheetName = sheets.getSheetName();
InputSource sheetSource = new InputSource(sheet);
//解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行
parser.parse(sheetSource);
sheet.close();
resultMap.put(sheetIndex,
new SheetResult(sheetRows - 1, sheetBlankRows, sheetIndex, sheetName));
}
return resultMap;
}
private boolean isTextTag(String name) {
if ("v".equals(name)) {
// Easy, normal v text tag
return true;
} else if ("inlineStr".equals(name)) {
// Easy inline string
return true;
} else if ("t".equals(name) && isIsOpen) {
// Inline string <is><t>...</t></is> pair
return true;
} else {
// It isn't a text tag
return false;
}
}
/**
* 标签属性在开始标签中,因此需要将这些属性记录下来,供其他方法使用
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (isTextTag(localName)) {
// Clear contents cache
value.setLength(0);
isRowBlank = false;
} else if ("is".equals(localName)) {
// Inline string outer tag
isIsOpen = true;
} else if ("row".equals(localName)) { // 获取行标签
String rowNumStr = attributes.getValue("r");
preRowNum = rowNum;
if (rowNumStr != null) {
rowNum = Integer.parseInt(rowNumStr); // 从1开始计数
} else {
rowNum = nextRowNum;
}
isRowBlank = true; // reset flag
output.startRow(rowNum, sheetName);
} else if ("c".equals(localName)) { // 获取列标签
// Set up defaults.
this.nextDataType = CellDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
// A1 A2 B1
cellRef = attributes.getValue("r");
if (cellRef != null) {
colPrefix = cellRef.substring(0, 1);
}
// 类型
// 不是字符串,t都没有
String cellType = attributes.getValue("t");
// 单元格格式
// 比如 '123 带’开头的字符串 是s="6", 日期格式有1,2,5,6等
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = CellDataType.BOOLEAN;
else if ("e".equals(cellType))
nextDataType = CellDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = CellDataType.INLINE_STRING;
else if ("s".equals(cellType))
nextDataType = CellDataType.SST_STRING;
else if ("str".equals(cellType))
nextDataType = CellDataType.FORMULA;
else {
// Number, but almost certainly with a special style or format
XSSFCellStyle style = null;
if (stylesTable != null) {
if (cellStyleStr != null) {
int styleIndex = Integer.parseInt(cellStyleStr);
style = stylesTable.getStyleAt(styleIndex);
} else if (stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
}
if (style != null) {
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
}
if (dateFormatString != null && formatString != null) {
if (formatString.contains("yyyy") || formatString.contains("mmm")) {
nextDataType = CellDataType.DATE;
if (!formatString.equals(dateFormatString))
formatString = dateFormatString;
}
}
}
} else if ("sheetData".equals(localName)) {
output.startSheet(sheetName, sheetIndex);
}
}
/**
* 第二个执行
* 得到单元格对应的索引值或是内容值
* 如果单元格类型是字符串、INLINE_STRING、数字、日期,lastIndex则是索引值
* 如果单元格类型是布尔值、错误、公式,lastIndex则是内容值
* <p>
* SAX:Note that a SAX driver is free to chunk the character data any way it wants,
* so you cannot count on all of the character data content of an element arriving in a single characters event.
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
value.append(ch, start, length);
}
/**
* 第三个执行,取数逻辑都在这里
*
* @param uri
* @param localName
* @param name
* @throws SAXException
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
if (uri != null && !uri.equals(NS_SPREADSHEETML)) {
return;
}
String thisStr = null;
// v => contents of a cell
if (isTextTag(localName)) {
switch (nextDataType) {
case BOOLEAN:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "ERROR:" + value;
break;
case INLINE_STRING:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
break;
case SST_STRING:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
RichTextString rtss = sst.getItemAt(idx);
thisStr = rtss.toString();
} catch (NumberFormatException ex) {
logger.error("Failed to parse SST index ()", sstIndex, ex);
}
break;
case NUMBER:
case DATE:
String n = value.toString();
if (this.formatString != null && n.length() > 0)
thisStr = formatter.formatRawCellContents(Double.parseDouble(n),
this.formatIndex, this.formatString);
else
thisStr = n;
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output
output.cell(rowNum, colPrefix, cellRef, thisStr);
} else if ("row".equals(localName)) {
// Finish up the row
output.endRow(rowNum, sheetName, isRowBlank);
// some sheets do not have rowNum set in the XML,
// Excel can read them so we should try to read them as well
nextRowNum = rowNum + 1;
sheetRows = rowNum;
if (isRowBlank) {
sheetBlankRows++;
} else if (preRowNum + 1 < rowNum) {
sheetBlankRows += rowNum - preRowNum - 1;
}
} else if ("sheetData".equals(localName)) {
// indicate that this sheet is now done
output.endSheet(sheetName, sheetIndex, sheetRows - 1, sheetBlankRows);
}
}
}
com.blueorigin.common.components.office
package com.blueorigin.common.components.office;
import java.util.Collection;
import java.util.HashSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import com.blueorigin.common.components.office.support.UserRow;
import com.blueorigin.common.components.office.support.UserRowHandler;
/**
* 这是框架提供的并发处理的封装类,实际使用时,只需要将真正的执行器应用到包装器即可
*/
public class ConcurrentUserRowHandler<T extends UserRow> extends GenericEventListener<T>
implements Callable<Void> {
/**
* 阻塞队列大小,建议与 threadPoolSize 相匹配
*/
private static final int DEFAULT_BLOCKING_QUEUE_SIZE = 1024;
/**
* 并发线程大小
*/
private int threadPoolSize;
/**
* 创建阻塞队列,阻塞队列是Excel解析程序和解析数据处理程序的桥梁
*/
private BlockingQueue<T> blockQueue;
/**
* 真正的处理类
*/
private UserRowHandler<T> realHandler;
/**
* 文件解析完成标记
* */
private AtomicBoolean isComplete;
/**
* 解析器已经完成所有解析, 调用此方法通知执行器, 这是为了释放线程
*/
public void setComplete() {
this.isComplete.set(true);
}
public ConcurrentUserRowHandler(UserRowHandler<T> realHandler, int threadPoolSize) {
this(realHandler, DEFAULT_BLOCKING_QUEUE_SIZE, threadPoolSize);
}
/**
* @param capacity 大于0的整数
* @param threadPoolSize
*/
public ConcurrentUserRowHandler(UserRowHandler<T> realHandler, int capacity,
int threadPoolSize) {
this.blockQueue = new ArrayBlockingQueue<>(capacity);
this.isComplete = new AtomicBoolean(false);
this.realHandler = realHandler;
this.threadPoolSize = threadPoolSize;
Executors.newSingleThreadExecutor().execute(this::concurrentConsume);
}
// 并发执行
private void concurrentConsume() {
try {
Collection<Callable<Void>> tasks = new HashSet<>();
for (int i = 0; i < threadPoolSize; i++) {
// 这里后续可以根据TASK类型进行替换实现,记得抽取基类
tasks.add(this);
}
ExecutorService executorService = Executors.newFixedThreadPool(threadPoolSize);
executorService.invokeAll(tasks);
executorService.shutdown();
logger.info("ExecutorService shut down now");
} catch (InterruptedException e) {
logger.error("consume fail", e);
}
}
@Override
public void handle(T t) throws InterruptedException {
blockQueue.put(t);
}
/**
* stubun
*/
@Override
public Class<T> getSubClass() {
return realHandler.getSubClass();
}
@Override
public Void call() throws Exception {
assert this.blockQueue != null;
assert this.isComplete != null;
while (true) {
if (this.isComplete.get()) {
if (blockQueue.isEmpty())
return null;
}
T poll = blockQueue.poll();
if (poll != null) {
realHandler.handle(poll);
}
}
}
}
package com.blueorigin.common.components.office;
import java.lang.reflect.Field;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.blueorigin.common.components.office.annotation.ColumnAlias;
import com.blueorigin.common.components.office.annotation.ColumnReadIgnore;
import com.blueorigin.common.components.office.exceptions.OfficeReflectionException;
import com.blueorigin.common.components.office.exceptions.UnsupportedTypeException;
import com.blueorigin.common.components.office.support.UserRow;
import com.blueorigin.common.components.office.support.UserRowHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import com.blueorigin.common.components.office.support.UserSheetEventListener;
/**
* 不需要并发? 直接实现这个抽象类即可
*/
public abstract class GenericEventListener<T extends UserRow>
implements UserSheetEventListener, UserRowHandler<T> {
protected final Logger logger = LoggerFactory.getLogger(this.getClass());
private T current;
/**
* kv of colPrefix,fieldName
*/
private final Map<String, Field> fieldHeaderMap = new HashMap<>();
/**
* kv of field,Type
*/
private final Map<Field, Type> fieldTypeMap = new HashMap<>();
/**
* model所支持的类型必须是 int boolean double 及其包装类型 和 String
*/
private final List<Type> supportedTypes = new ArrayList<>();
private long processStartTime;
{
supportedTypes.add(String.class);
supportedTypes.add(Double.class);
supportedTypes.add(Integer.class);
supportedTypes.add(Boolean.class);
supportedTypes.add(double.class);
supportedTypes.add(int.class);
supportedTypes.add(boolean.class);
}
public GenericEventListener() {
}
private void initReflect(String colPrefix, String headerValue) throws UnsupportedTypeException {
headerValue = headerValue.trim();
Field[] fields = getSubClass().getDeclaredFields();
for (Field field : fields) {
Type fieldType = field.getType();
if (!supportedTypes.contains(fieldType)) {
throw new UnsupportedTypeException(fieldType.getTypeName());
}
boolean correctSet = false;
ColumnReadIgnore annotation1 = field.getAnnotation(ColumnReadIgnore.class);
if (annotation1 != null) {
continue;
}
ColumnAlias annotation = field.getAnnotation(ColumnAlias.class);
if (annotation != null) {
String value = annotation.value().trim();
if (value.equals(headerValue)) {
fieldHeaderMap.put(colPrefix, field);
correctSet = true;
}
}
if (!correctSet) {
String name = field.getName();
if (name.equals(headerValue)) {
fieldHeaderMap.put(colPrefix, field);
correctSet = true;
}
}
if (correctSet) {
field.setAccessible(true);
fieldTypeMap.put(field, fieldType);
return;
}
}
}
@Override
public void startRow(int rowNum, String sheetName) throws SAXException {
// 构建model
try {
if (rowNum > 1) { // 反射解析字段,注解并与Excel表头对应
current = getSubClass().newInstance();
}
} catch (Exception e) {
throw new OfficeReflectionException(e.getMessage());
}
}
@Override
public void endRow(int rowNum, String sheetName, boolean isRowBlank) {
if (!isRowBlank && current != null) {
current.setSheetName(sheetName);
current.setRowNum(rowNum);
try {
handle(current);
} catch (Exception e) {
logger.error("handle model exception, rowNum = {}", rowNum, e);
}
}
}
@Override
public void cell(int rowNum, String colPrefix, String cellReference,
String formattedValue) throws SAXException {
if (rowNum == 1) { // 反射解析字段,注解并与Excel表头对应
initReflect(colPrefix, formattedValue);
return;
}
String typeName = "";
try {
Field field = fieldHeaderMap.get(colPrefix);
if (field != null) {
Type type = fieldTypeMap.get(field);
typeName = type.getTypeName();
if (Integer.class.equals(type) || int.class.equals(type)) {
field.set(current, Integer.valueOf(formattedValue));
} else if (Boolean.class.equals(type) || boolean.class.equals(type)) {
field.set(current, Boolean.valueOf(formattedValue));
} else if (Double.class.equals(type) || double.class.equals(type)) {
field.set(current, Double.valueOf(formattedValue));
} else if (String.class.equals(type)) {
field.set(current, formattedValue);
} else {
throw new UnsupportedTypeException(typeName);
}
}
} catch (UnsupportedTypeException e) {
throw new UnsupportedTypeException(typeName);
} catch (IllegalAccessException e) {
throw new OfficeReflectionException(e.getMessage());
}
}
@Override
public void startSheet(String sheetName, int sheetIndex) {
logger.info("sheet parse started for {}", sheetName);
processStartTime = System.currentTimeMillis();
}
@Override
public void endSheet(String sheetName, int sheetIndex, int sheetRows, int sheetBlankRows) {
long processEndTime = System.currentTimeMillis();
String userTime = "";
if (processEndTime > 0 && processStartTime > 0) {
long useTime = processEndTime - processStartTime;
long mm = useTime % 1000;
long seconds = useTime / 1000;//除以1000是为了转换成秒
long minutes = seconds / 60; // 多少分
long hours = minutes / 60 / 60; // 多少小时
userTime = "毫秒" + userTime;
userTime = mm + userTime;
if (seconds > 0) {
userTime = "秒" + userTime;
userTime = seconds + userTime;
if (minutes > 0) {
userTime = "分钟" + userTime;
userTime = minutes + userTime;
if (hours > 0) {
userTime = "小时" + userTime;
userTime = hours + userTime;
}
}
}
}
logger.info("sheet parse end for {}, total rows = {}, blank rows = {}, use {}", sheetName,
sheetRows, sheetBlankRows, userTime);
}
}
package com.blueorigin.common.components.office;
import java.io.InputStream;
import java.util.Map;
import com.blueorigin.common.components.office.support.SheetResult;
import com.blueorigin.common.components.office.support.UserSheetEventListener;
import com.blueorigin.common.components.office.support.XlsxExcelReader;
/**
* 核心开发类,参考如下代码:
*
* <pre>
* {@code
*
* // Excel内部使用的日期格式
* String patten = "yyyy-MM-dd HH:mm:ss";
*
* UserRowHandler<ColumnModel> realHandler = userRow -> System.out.println(userRow);
*
* ConcurrentUserRowHandler<ColumnModel> handler = new ConcurrentUserRowHandler(
* ColumnModel.class, realHandler, 5);
*
* XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
* try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
* Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
* SheetResult sheetResult = parse.get(1);
* assert sheetResult != null;
* assert sheetResult.getSheetRows() == sheetRows;
* assert sheetResult.getSheetBlankRows() == sheetBlankRows;
* } catch (Exception e) {
* logger.error("test fail", e);
* }
*
* }
* </pre>
*/
public class XlsxExcelParser {
private final XlsxExcelReader reader;
private final UserSheetEventListener output;
/**
* 没有日期字段调用这个构造
*/
public XlsxExcelParser(UserSheetEventListener sheetContentsHandler) {
output = sheetContentsHandler;
reader = new XlsxExcelReader(sheetContentsHandler);
}
/**
* Excel中有日期时可以指定日期格式
*
* @param sheetContentsHandler
* @param dateFormatString 如果需要指定日期格式,为null时,使用Excel指定的格式。注意有些中文日期格式无法正确解析
*/
public XlsxExcelParser(UserSheetEventListener sheetContentsHandler, String dateFormatString) {
output = sheetContentsHandler;
reader = new XlsxExcelReader(sheetContentsHandler, dateFormatString);
}
/**
* @param in 不负责关闭流,请客户端自己处理
* @return
* @throws Exception
*/
public Map<Integer, SheetResult> parse(InputStream in) throws Exception {
Map<Integer, SheetResult> parse = reader.parse(in);
if (output instanceof ConcurrentUserRowHandler) {
ConcurrentUserRowHandler concurrentUserRowHandler = (ConcurrentUserRowHandler) output;
concurrentUserRowHandler.setComplete();
}
return parse;
}
}
test
package com.blueorigin.common.components.office.sax;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.blueorigin.common.components.office.XlsxExcelParser;
import com.blueorigin.common.components.office.support.SheetResult;
import com.blueorigin.common.components.office.support.UserRowHandler;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.io.Resources;
import com.blueorigin.common.components.office.ConcurrentUserRowHandler;
import com.blueorigin.common.components.office.support.XlsxExcelReader;
public class XlsxExcelReaderTest {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
private final int sheetRows = 6640;
private final int sheetBlankRows = 24;
/**
* 重构时更改并跑通这个test
*/
@Test
public void mainTest() {
String patten = "yyyy-MM-dd HH:mm:ss";
ColumnModelEventListener handler = new ColumnModelEventListener();
XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
SheetResult sheetResult = parse.get(1);
assert sheetResult != null;
assert sheetResult.getSheetRows() == sheetRows;
assert sheetResult.getSheetBlankRows() == sheetBlankRows;
} catch (Exception e) {
logger.error("test fail", e);
}
}
@Test
public void mainTest2() {
String patten = "yyyy-MM-dd HH:mm:ss";
UserRowHandler<ColumnModel> realHandler = new UserRowHandler<ColumnModel>() {
@Override
public void handle(ColumnModel columnModel) throws Exception {
System.out.println(columnModel);
}
@Override
public Class<ColumnModel> getSubClass() {
return ColumnModel.class;
}
};
ConcurrentUserRowHandler<ColumnModel> handler = new ConcurrentUserRowHandler<>(realHandler,
5);
XlsxExcelParser xlsxExcelParser = new XlsxExcelParser(handler, patten);
try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
Map<Integer, SheetResult> parse = xlsxExcelParser.parse(in);
SheetResult sheetResult = parse.get(1);
assert sheetResult != null;
assert sheetResult.getSheetRows() == sheetRows;
assert sheetResult.getSheetBlankRows() == sheetBlankRows;
} catch (Exception e) {
logger.error("test fail", e);
}
}
@Test
public void testType() {
final List<Class> supportedTypes = new ArrayList<>();
supportedTypes.add(String.class);
supportedTypes.add(Double.class);
supportedTypes.add(Integer.class);
supportedTypes.add(Boolean.class);
supportedTypes.add(double.class);
supportedTypes.add(int.class);
supportedTypes.add(boolean.class);
System.out.println(supportedTypes.contains(String.class));
System.out.println(supportedTypes.contains(Double.class));
System.out.println(supportedTypes.contains(Integer.class));
System.out.println(supportedTypes.contains(Boolean.class));
System.out.println(supportedTypes.contains(int.class));
System.out.println(supportedTypes.contains(double.class));
System.out.println(supportedTypes.contains(boolean.class));
}
/**
* 打印EXCEL的API内容, 开发该组件时使用的。
*/
@Test
public void testUserSheetEventListener() {
String patten = "yyyy-MM-dd HH:mm:ss";
TestSheetContenctHandler testSheetContenctHandler = new TestSheetContenctHandler();
XlsxExcelReader xlsxExcelHandler = new XlsxExcelReader(testSheetContenctHandler, patten);
try (InputStream in = Resources.getResource("whiteList.xlsx").openStream()) {
xlsxExcelHandler.parse(in);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
package com.blueorigin.common.components.office.sax;
import com.blueorigin.common.components.office.annotation.ColumnAlias;
import com.blueorigin.common.components.office.annotation.ColumnReadIgnore;
import com.blueorigin.common.components.office.support.UserRow;
public final class ColumnModel extends UserRow {
@ColumnAlias("姓名")
private String name;
@ColumnAlias("手机号")
private String mobile;
@ColumnAlias("性别")
private Boolean gender;
@ColumnAlias("年龄")
private Integer age;
@ColumnAlias("身高")
@ColumnReadIgnore
private Double height;
private String testField = "testField";
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getMobile() {
return mobile;
}
public void setMobile(String mobile) {
this.mobile = mobile;
}
public Boolean getGender() {
return gender;
}
public void setGender(Boolean gender) {
this.gender = gender;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public Double getHeight() {
return height;
}
public void setHeight(Double height) {
this.height = height;
}
@Override
public String toString() {
return "ColumnModel{" + "name='" + name + '\'' + ", mobile='" + mobile + '\'' + ", gender="
+ gender + ", age=" + age + ", height=" + height + ", testField='" + testField + '\''
+ "} ";
}
}
package com.blueorigin.common.components.office.sax;
import com.blueorigin.common.components.office.GenericEventListener;
/**
* 一行一行执行
*/
public class ColumnModelEventListener extends GenericEventListener<ColumnModel> {
@Override
public void handle(ColumnModel columnModel) {
System.out.println(columnModel);
}
@Override
public Class<ColumnModel> getSubClass() {
return ColumnModel.class;
}
}