问题描述
在我们的项目中经常会遇到需要导入一定规格的excel文档,然后来解析里面的内容转换为数据导入到数据库或者进行其他操作。这样解析的问题就来了,解析excle从文件后缀名上分为.xls和.xlsx两种,从数据的格式上分为行数据和列数据。
解决方案
直接上代码,解析行数据基础类
package com.test.excel;
import java.io.IOException;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
/**
* @author chinwer
* @created 2014-5-20
*/
public abstract class ExcelTransfer<T extends BaseEntity> {
public static final String OFFICE_EXCEL_2003_POSTFIX = "xls";
public static final String OFFICE_EXCEL_2010_POSTFIX = "xlsx";
public static final String NOT_EXCEL_FILE = " : Not the Excel file!";
public static final String PROCESSING = "Processing...";
public static final String EMPTY = "";
public static final String POINT = ".";
protected abstract T getModel();
private List<String> parseErrorList = new ArrayList<String>();
public String getParseErrorInfo() {
if (!parseErrorList.isEmpty()) {
return ArrayUtils.toString(parseErrorList, "\n");
}
return "";
}
/**
* @return the parseErrorList
*/
public List<String> getParseErrorList() {
return parseErrorList;
}
/**
* 读取xls或者xlsx文件
* @param is excel文件输入流
* @param startRow 开始行 传null值默认取1
* @param endRow 结束行 传null值默认取现有数据行
* @param startColumn 开始列
* @param endColumn 结束列
* @return 每行数据以list返回,每个元素对应一列
* @throws Exception
*/
public List<T> readExcel(String fileName, InputStream is, Integer startRow, Integer endRow, Integer startColumn,
Integer endColumn) throws Exception {
if (is == null) {
throw new Exception("is参数为必传");
}
if (startColumn == null || endColumn == null) {
throw new Exception("startColumn和endColumn参数为必传");
} else {
String postfix = getPostfix(fileName);
if (StringUtils.isNotBlank(postfix)) {
if (OFFICE_EXCEL_2003_POSTFIX.equals(postfix)) {
return readXls(is, startRow, endRow, startColumn, endColumn);
} else if (OFFICE_EXCEL_2010_POSTFIX.equals(postfix)) {
return readXlsx(is, startRow, endRow, startColumn, endColumn);
}
} else {
System.out.println(fileName + NOT_EXCEL_FILE);
}
}
return null;
}
/**
* Read the Excel 2010
* @param is of the excel file
* @return
* @throws IOException
*/
public List<T> readXlsx(InputStream is, Integer startRow, Integer endRow, Integer startColumn, Integer endColumn)
throws IOException {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is);
List<T> retlist = new ArrayList<T>();
for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) {
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet);
if (xssfSheet == null) {
continue;
}
if (startRow == null)
startRow = 1;
if (endRow == null)
endRow = xssfSheet.getLastRowNum() + 1;
for (int rowNum = startRow - 1; rowNum < endRow; rowNum++) {
XSSFRow xssfRow = xssfSheet.getRow(rowNum);
if (xssfRow != null) {
List<String> errorList = new ArrayList<String>();
T model = getModel();
for (int colNum = startColumn; colNum <= endColumn; colNum++) {
try {
setCellValue(model, xssfRow, colNum);
} catch (Exception e) {
errorList.add(String.valueOf(colNum));
}
}
if (!errorList.isEmpty()) {
parseErrorList.add("第" + String.valueOf(xssfRow.getRowNum() + 1) + "行的第"
+ ArrayUtils.toString(errorList, ",") + "列存在格式错误!");
} else if (retlist.contains(model)) {
parseErrorList.add("第" + String.valueOf(xssfRow.getRowNum() + 1) + "行的数据重复!");
} else {
model.setRowNum(String.valueOf(xssfRow.getRowNum() + 1));
retlist.add(model);
}
}
}
break;
}
return retlist;
}
/**
* Read the Excel 2003-2007
* @param is of the Excel
* @return
* @throws IOException
*/
public List<T> readXls(InputStream is, Integer startRow, Integer endRow, Integer startColumn, Integer endColumn)
throws IOException {
HSSFWorkbook hssfWorkbook = new HSSFWorkbook(is);
List<T> retlist = new ArrayList<T>();
for (int numSheet = 0; numSheet < hssfWorkbook.getNumberOfSheets(); numSheet++) {
System.out.println(numSheet);
HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet);
if (hssfSheet == null) {
continue;
}
if (startRow == null)
startRow = 1;
if (endRow == null)
endRow = hssfSheet.getLastRowNum() + 1;
for (int rowNum = startRow - 1; rowNum < endRow; rowNum++) {
HSSFRow hssfRow = hssfSheet.getRow(rowNum);
if (hssfRow != null) {
List<String> errorList = new ArrayList<String>();
T model = getModel();
for (int colNum = startColumn; colNum <= endColumn; colNum++) {
try {
setCellValue(model, hssfRow, colNum);
} catch (Exception e) {
errorList.add(String.valueOf(colNum));
}
}
if (!errorList.isEmpty()) {
parseErrorList.add("第" + String.valueOf(hssfRow.getRowNum() + 1) + "行的第"
+ ArrayUtils.toString(errorList, ",") + "列存在格式错误!");
} else if (retlist.contains(model)) {
parseErrorList.add("第" + String.valueOf(hssfRow.getRowNum() + 1) + "行的数据重复!");
} else {
model.setRowNum(String.valueOf(hssfRow.getRowNum() + 1));
retlist.add(model);
}
}
}
break;
}
return retlist;
}
@SuppressWarnings("static-access")
protected String getValue(Cell cell, DecimalFormat df) {
if (cell == null) {
return EMPTY;
}
if (cell.getCellType() == cell.CELL_TYPE_BOOLEAN) {
return String.valueOf(cell.getBooleanCellValue());
} else if (cell.getCellType() == cell.CELL_TYPE_NUMERIC) {
return df.format(cell.getNumericCellValue());
} else {
return String.valueOf(cell.getStringCellValue());
}
}
private static String getPostfix(String path) {
if (path == null || StringUtils.isEmpty(path.trim())) {
return EMPTY;
}
if (path.contains(POINT)) {
return path.substring(path.lastIndexOf(POINT) + 1, path.length());
}
return EMPTY;
}
/**
* 设置每个单元格对应模型的属性<br>
* (主要注意一下对存数字的单元格和即可能存字符又可能存数字的单元格的格式化)
* @param model
* @param row
* @param colNum
*/
protected abstract void setCellValue(T model, Row row, int colNum) throws Exception;
}
解析行数据测试类,demo.java
package com.test.excel;
public class Demo extends BaseEntity {
private String id;
private String orgName;
private String orgCode;
private String contactName;
private String phone;
private String email;
private String productName;
private String productCode;
private String date;
private String price;
private String pice;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getOrgName() {
return orgName;
}
public void setOrgName(String orgName) {
this.orgName = orgName;
}
public String getOrgCode() {
return orgCode;
}
public void setOrgCode(String orgCode) {
this.orgCode = orgCode;
}
public String getContactName() {
return contactName;
}
public void setContactName(String contactName) {
this.contactName = contactName;
}
public String getPhone() {
return phone;
}
public void setPhone(String phone) {
this.phone = phone;
}
public String getEmail() {
return email;
}
public void setEmail(String email) {
this.email = email;
}
public String getProductName() {
return productName;
}
public void setProductName(String productName) {
this.productName = productName;
}
public String getProductCode() {
return productCode;
}
public void setProductCode(String productCode) {
this.productCode = productCode;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getPice() {
return pice;
}
public void setPice(String pice) {
this.pice = pice;
}
@Override
public String toString() {
return "Demo [id=" + id + ", orgName=" + orgName + ", orgCode=" + orgCode + ", contactName=" + contactName
+ ", phone=" + phone + ", email=" + email + ", productName=" + productName + ", productCode="
+ productCode + ", date=" + date + ", price=" + price + ", pice=" + pice + "]";
}
}
BaseEntity .java
package com.test.excel;
import java.io.Serializable;
public class BaseEntity implements Serializable {
/**
* 域的描述:
* 合法值范围:
* 空值说明:
*/
private static final long serialVersionUID = 2047950479396700672L;
private String rowNum;
public String getRowNum() {
return rowNum;
}
public void setRowNum(String rowNum) {
this.rowNum = rowNum;
}
}
ProductTypeDictEnum.java
package com.test.excel;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
/*********************************************************************************
//* Copyright (C) 2015 Pingan Haoche (PAHAOCHE). All Rights Reserved.
//*
//* Filename: ProductDictEnum.java
//* Revision: 1.0
//* Author: <gao yunqi>
//* Created On: 2016年2月18日
//* Modified by:
//* Modified On:
//*
//* Description: <产品类型相关字典枚举类>
/********************************************************************************/
public enum ProductTypeDictEnum {
IMSP_PRODUCT_TYPE_10("10", "见证业务"), IMSP_PRODUCT_TYPE_3("3", "净值型"), IMSP_PRODUCT_TYPE_4("4", "预期收益率型"), IMSP_PRODUCT_TYPE_5(
"5", "资产挂牌转让"), IMSP_PRODUCT_TYPE_6("6", "非标债权型"), IMSP_PRODUCT_TYPE_7("7", "智能宝"), IMSP_PRODUCT_TYPE_8(
"8", "ABS"), IMSP_PRODUCT_TYPE_9("9", "F2F"),
;
private String code;//状态code
private String des;//状态描述
private static Map<String, String> desShowMap = new HashMap<String, String>();
static {
ProductTypeDictEnum[] rs = ProductTypeDictEnum.values();
for (ProductTypeDictEnum productEnum : rs) {
desShowMap.put(productEnum.getDes(), productEnum.getCode());
}
}
/**
* @param code
* @param des
*/
private ProductTypeDictEnum(String code, String des) {
this.code = code;
this.des = des;
}
/**
* @return the code
*/
public String getCode() {
return code;
}
/**
* @param code the code to set
*/
public void setCode(String code) {
this.code = code;
}
/**
* @return the des
*/
public String getDes() {
return des;
}
/**
* @param des the des to set
*/
public void setDes(String des) {
this.des = des;
}
/**
*
* @param getCodeByDes
* @return
*/
public static String getCodeByDes(String des) {
if (StringUtils.isNotBlank(des)) {
return ProductTypeDictEnum.desShowMap.get(des);
} else {
return null;
}
}
}
demo转换测试类
package com.test.excel;
import java.io.File;
import java.io.FileInputStream;
import java.text.DecimalFormat;
import java.util.List;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
public class DemoExcelTransfer extends ExcelTransfer<Demo> {
@Override
protected Demo getModel() {
return new Demo();
}
@Override
public void setCellValue(Demo demo, Row row, int colNum) {
Cell cell = row.getCell(colNum - 1);
DecimalFormat df = new DecimalFormat("#");
switch (colNum) {
case 1:
demo.setId(getValue(cell, df));
break;
case 2:
demo.setOrgName(getValue(cell, df));
break;
case 3:
demo.setOrgCode(getValue(cell, df));
break;
case 4:
demo.setContactName(getValue(cell, df));
break;
case 5:
demo.setPhone(getValue(cell, df));
break;
case 6:
demo.setEmail(getValue(cell, df));
break;
case 7:
demo.setProductName(getValue(cell, df));
break;
case 8:
demo.setProductCode(getValue(cell, df));
break;
case 9:
demo.setDate(getValue(cell, df));
break;
case 10:
DecimalFormat df1 = new DecimalFormat("####.00");
demo.setPrice(getValue(cell, df1));
break;
case 11:
demo.setPice(getValue(cell, df));
break;
}
}
public static void main(String[] args) throws Exception {
String excel2010 = "D:\\testDemo.xlsx";
FileInputStream inputStream = new FileInputStream(new File(excel2010));
// read the 2003-2007 excel
List<Demo> list = new DemoExcelTransfer().readExcel(excel2010, inputStream, 2, 3, 1, 11);
for (Demo demo : list) {
System.out.println(demo.toString());
}
}
}
excel原始数据截图
解析结果输出如下
读取列数据的工具类如下,操作参考读取行数据的操作
package com.test.excel;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public abstract class ExcelColumnTransfer<T extends BaseEntity> {
public static final String OFFICE_EXCEL_2003_POSTFIX = "xls";
public static final String OFFICE_EXCEL_2010_POSTFIX = "xlsx";
public static final String NOT_EXCEL_FILE = " : Not the Excel file!";
public static final String PROCESSING = "Processing...";
public static final String EMPTY = "";
public static final String POINT = ".";
protected abstract T getModel();
private List<String> parseErrorList = new ArrayList<String>();
private String parseMessage; /* 解析信息。总共解析条数,成功和失败条数 */
public String getParseErrorInfo() {
if (!parseErrorList.isEmpty()) {
return ArrayUtils.toString(parseErrorList, "\n");
}
return "";
}
/**
* @return the parseErrorList
*/
public List<String> getParseErrorList() {
return parseErrorList;
}
/**
* 读取xls或者xlsx文件
* @param path 文件路径,包括文件名
* @param startRow 开始行 传null值默认取1
* @param endRow 结束行 传null值默认取现有数据行
* @param startColumn 开始列
* @param endColumn 结束列
* @return 每行数据以list返回,每个元素对应一列
* @throws Exception
*/
public List<T> readExcel(String path, Integer startRow, Integer endRow, Integer startColumn, Integer endColumn)
throws Exception {
if (path == null || StringUtils.isBlank(path)) {
throw new Exception("path参数为必传");
}
if (startColumn == null) {
throw new Exception("startColumn和endColumn参数为必传");
} else {
String postfix = getPostfix(path);
if (StringUtils.isNotBlank(postfix)) {
if (OFFICE_EXCEL_2003_POSTFIX.equals(postfix)) {
return readXls(path, startRow, endRow, startColumn, endColumn);
} else if (OFFICE_EXCEL_2010_POSTFIX.equals(postfix)) {
return readXlsx(path, startRow, endRow, startColumn, endColumn);
}
} else {
System.out.println(path + NOT_EXCEL_FILE);
}
}
return null;
}
/**
* Read the Excel 2010
* @param path the path of the excel file
* @return
* @throws IOException
*/
public List<T> readXlsx(String path, Integer startRow, Integer endRow, Integer startColumn, Integer endColumn)
throws IOException {
int totalRecord = 0;
int successRecord = 0;
int failureRecord = 0;
System.out.println(PROCESSING + path);
InputStream is = new FileInputStream(path);
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(is);
List<T> retlist = new ArrayList<T>();
DecimalFormat df = new DecimalFormat("#");
for (int numSheet = 0; numSheet < xssfWorkbook.getNumberOfSheets(); numSheet++) {
XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(numSheet);
List<String> oneSheetErrorList = new ArrayList<String>();
if (xssfSheet == null) {
continue;
}
if (startRow == null)
startRow = 1;
endRow = xssfSheet.getLastRowNum() + 1;
int maxColumn = 0;
for (int i = 0; i < endRow.intValue(); i++) {
if (null != xssfSheet.getRow(i)) {
int rowColumn = xssfSheet.getRow(i).getLastCellNum();
if (maxColumn < rowColumn) {
Cell cell = xssfSheet.getRow(i).getCell(rowColumn - 1);
String param = getValue(cell, df);
if (null != param && !"".equals(param)) {
maxColumn = rowColumn;
}
}
}
}
for (int coloumNum = startColumn; coloumNum <= maxColumn; coloumNum++) {
T model = getModel();
String productTypeName = xssfSheet.getSheetName();
String productType = ProductTypeDictEnum.getCodeByDes(productTypeName);
List<String> errorList = new ArrayList<String>();
for (int rowNum = startRow - 1; rowNum < endRow; rowNum++) {
totalRecord += 1;
XSSFRow xssfRows = xssfSheet.getRow(rowNum);
try {
setCellValue(model, xssfRows, coloumNum, numSheet + 1, productType);
successRecord += 1;
} catch (Exception e) {
e.printStackTrace();
errorList.add(String.valueOf(xssfRows.getRowNum() + 1));
failureRecord += 1;
}
}
if (!errorList.isEmpty()) {
oneSheetErrorList.add("第" + String.valueOf(coloumNum) + "列的第" + ArrayUtils.toString(errorList, ",")
+ "行存在格式错误!");
} else {
model.setRowNum(String.valueOf(coloumNum));
retlist.add(model);
}
}
if (!oneSheetErrorList.isEmpty()) {
parseErrorList.add("第" + (numSheet + 1) + "个sheet页存在错误:" + oneSheetErrorList.toString());
oneSheetErrorList.clear();
}
parseMessage = "总共导入" + totalRecord + "条数据,成功" + successRecord + ",失败" + failureRecord;
}
return retlist;
}
/**
* Read the Excel 2003-2007
* @param path the path of the Excel
* @return
* @throws IOException
*/
public List<T> readXls(String path, Integer startRow, Integer endRow, Integer startColumn, Integer endColumn)
throws IOException {
System.out.println(PROCESSING + path);
InputStream is = new FileInputStream(path);
HSSFWorkbook hssfWorkbook = new HSSFWorkbook(is);
List<T> retlist = new ArrayList<T>();
List<String> oneSheetErrorList = new ArrayList<String>();
DecimalFormat df = new DecimalFormat("#");
for (int numSheet = 0; numSheet < hssfWorkbook.getNumberOfSheets(); numSheet++) {
HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(numSheet);
if (hssfSheet == null) {
continue;
}
if (startRow == null)
startRow = 1;
if (endRow == null)
endRow = hssfSheet.getLastRowNum() + 1;
int maxColumn = 0;
for (int i = 0; i < endRow.intValue(); i++) {
if (null != hssfSheet.getRow(i)) {
int rowColumn = hssfSheet.getRow(i).getLastCellNum();
if (maxColumn < rowColumn) {
Cell cell = hssfSheet.getRow(i).getCell(rowColumn - 1);
String param = getValue(cell, df);
if (null != param && !"".equals(param)) {
maxColumn = rowColumn;
}
}
}
}
for (int coloumNum = startColumn; coloumNum <= maxColumn; coloumNum++) {
T model = getModel();
List<String> errorList = new ArrayList<String>();
String productTypeName = hssfSheet.getSheetName();
String productType = ProductTypeDictEnum.getCodeByDes(productTypeName);
for (int rowNum = startRow - 1; rowNum < endRow; rowNum++) {
HSSFRow hssfRows = hssfSheet.getRow(rowNum);
try {
setCellValue(model, hssfRows, coloumNum, numSheet + 1, productType);
} catch (Exception e) {
errorList.add(String.valueOf(hssfRows.getRowNum()));
}
}
if (!errorList.isEmpty()) {
oneSheetErrorList.add("第" + String.valueOf(coloumNum) + "列的第" + ArrayUtils.toString(errorList, ",")
+ "行存在格式错误!");
} else {
model.setRowNum(String.valueOf(coloumNum));
retlist.add(model);
}
retlist.add(model);
}
if (!oneSheetErrorList.isEmpty()) {
parseErrorList.add("第" + (numSheet + 1) + "个sheet页存在错误:" + oneSheetErrorList.toString());
oneSheetErrorList.clear();
}
}
return retlist;
}
@SuppressWarnings("static-access")
protected String getValue(Cell cell, DecimalFormat df) {
if (cell == null) {
return EMPTY;
}
if (cell.getCellType() == cell.CELL_TYPE_BOOLEAN) {
return String.valueOf(cell.getBooleanCellValue());
} else if (cell.getCellType() == cell.CELL_TYPE_NUMERIC) {
return df.format(cell.getNumericCellValue());
} else {
return String.valueOf(cell.getStringCellValue());
}
}
private static String getPostfix(String path) {
if (path == null || StringUtils.isEmpty(path.trim())) {
return EMPTY;
}
if (path.contains(POINT)) {
return path.substring(path.lastIndexOf(POINT) + 1, path.length());
}
return EMPTY;
}
/**
* 设置每个单元格对应模型的属性<br>
* (主要注意一下对存数字的单元格和即可能存字符又可能存数字的单元格的格式化)
* @param model
* @param row
* @param colNum
*/
protected abstract void setCellValue(T model, Row row, int colNum, int type, String productType) throws Exception;
public String getParseMessage() {
return parseMessage;
}
public void setParseMessage(String parseMessage) {
this.parseMessage = parseMessage;
}
}
注:
以上代码所需jar包地址如下:
http://download.csdn.net/download/gao36951/10032742