使用POI相关API来读取Excel,代码如下:
package com.pan.utils;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.WorkbookUtil;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import com.excel.entity.SheetResult;
/**
* 读取Excel的工具类
* <br/>
* 需要做的几件事:<br/>
* 1. 获取Excel文件的Workbook对象;<br/>
* 2. 读取指定Sheet页的所有内容;<br/>
* 3. 读取指定Sheet页中某些有用数据;<br/>
* 4. 读取Sheet页数据封装成对象;<br/>
*
* @since Excel Study 1.0
*/
public class ReadExcelUtil {
public final static int START = 0;
/**
* 获取Excel文件的Workbook对象
* @param fileName
* @return
* @throws Exception
*/
public static Workbook getWorkBook(String fileName) throws Exception {
long startTime = System.currentTimeMillis();
// Workbook wb = WorkbookFactory.create(new File(fileName));
// Workbook wb = WorkbookFactory.create(new FileInputStream(fileName));
// NPOIFSFileSystem fs = new NPOIFSFileSystem(new FileInputStream(fileName));
// HSSFWorkbook wb = new HSSFWorkbook(fs.getRoot(), true);
Workbook wb = null;
try {
// NPOIFSFileSystem fs = new NPOIFSFileSystem(new FileInputStream(fileName));
// wb = new HSSFWorkbook(fs.getRoot(), true);
wb = new HSSFWorkbook(new FileInputStream(fileName));
} catch (Exception e) {
wb = new XSSFWorkbook(new FileInputStream(fileName));
// wb = WorkbookFactory.create(new FileInputStream(fileName));
}
// OPCPackage pkg = OPCPackage.open(new FileInputStream(fileName));
// XSSFWorkbook wb = new XSSFWorkbook(pkg);
System.out.println(System.currentTimeMillis() - startTime);
return wb;
}
/**
* 读取指定Sheet页的所有内容
* @param sheet
* @return
*/
public static List<List<String>> readFromSheet(Sheet sheet) {
if(sheet == null) {
throw new NullPointerException("sheet not be null");
}
//保存所有读取到的数据
List<List<String>> dataList = new ArrayList<List<String>>();
//遍历行
for (Row row : sheet) {
List<String> rowDataList = new ArrayList<String>();
//遍历列
for (Cell cell : row) {
rowDataList.add(getCellValue(cell));
}
dataList.add(rowDataList);
}
return dataList;
}
/**
* 读取指定Sheet页的所有内容
* @param wb
* @param sheetName
* @return
*/
public static List<List<String>> readFromSheet(Workbook wb, String sheetName) {
Sheet sheet = wb.getSheet(sheetName);
return readFromSheet(sheet);
}
/**
* 读取指定Sheet页中某些有用数据
* @param sheet
* @param includeColNameSet 有用数据开始的列名集合,用于有用数据所在位置的可信度判断
* @return
*/
public static List<List<String>> readFromSheet(Sheet sheet, Set<String> includeColNameSet) {
if(sheet == null) {
throw new NullPointerException("sheet not be null");
}
//保存所有读取到的数据
List<List<String>> dataList = new ArrayList<List<String>>();
//存储临时数据的列表,用于定位有用数据所在的位置
List<String> tempValueList = new ArrayList<String>();
String tempValue = null;
int startCellNum = START;
boolean isStart = false;
//可能是有用数据,可能为true,否则为false,可能的依据是:包含可用数据
boolean isPossible = false;
int rowIndex = 0;
//遍历行
for(Row row : sheet) {
isPossible = false;
List<String> rowDataList = null;
if(startCellNum == START) {
tempValueList.clear();
} else {
rowDataList = new ArrayList<String>();
}
//遍历列
for (int j = startCellNum, cellLen = row.getLastCellNum() ; j < cellLen; j++) {
Cell cell = row.getCell(j);
tempValue = getCellValue(cell);
if(!isStart || startCellNum == START) {
tempValueList.add(tempValue);
} else {
rowDataList.add(tempValue);
}
if(!isStart && includeColNameSet.contains(tempValue)) {
isPossible = true;
}
}
if(startCellNum == START && isPossible) {
//获取可用数据所在的列
startCellNum = getColIndex(tempValueList, includeColNameSet);
}
//可信位置存在,则判断其可信度
if(!isStart && startCellNum != START) {
rowDataList = new ArrayList<String>();
//添加有用数据
for(int k = startCellNum, cellLen = row.getLastCellNum(); k < cellLen; k++) {
rowDataList.add(tempValueList.get(k));
}
dataList.add(rowDataList);
rowIndex++;
isStart = true;
} else if(startCellNum != START) {
if(isEquals(dataList.get(rowIndex-1), rowDataList)) {
break;
} else {
dataList.add(rowDataList);
rowIndex++;
}
}
}
return dataList;
}
/**
* 读取指定Sheet页中某些有用数据
* @param wb
* @param sheetName
* @param includeColNameSet
* @return
*/
public static List<List<String>> readFromSheet(Workbook wb,
String sheetName, Set<String> includeColNameSet) {
Sheet sheet = wb.getSheet(sheetName);
return readFromSheet(sheet, includeColNameSet);
}
/**
* 读取Sheet页数据封装成对象
* @param sheet
* @param includeColNameSet
* @param colLineNum 列名行数
* @return
*/
public static SheetResult readFromSheet(Sheet sheet, Set<String> includeColNameSet,
int colLineNum) {
if(sheet == null) {
throw new NullPointerException("sheet not be null");
}
//保存所有读取到的数据
List<List<String>> dataList = new ArrayList<List<String>>();
//记录列名与数据索引
Map<String, Integer> colNameMap = null;
//存储临时数据的列表,用于定位有用数据所在的位置
List<String> tempValueList = new ArrayList<String>(sheet.getLastRowNum() / 2);
String tempValue = null;
int maxCellNum = Integer.MAX_VALUE;
int cellNum = Integer.MAX_VALUE;
int startCellNum = START;
boolean isStart = false;
//可能是有用数据,可能为true,否则为false,可能的依据是:包含可用数据
boolean isPossible = false;
int rowIndex = 0;
int index = 0;
//遍历行
for(Row row : sheet) {
isPossible = false;
List<String> rowDataList = null;
if(startCellNum == START) {
tempValueList.clear();
cellNum = Math.min(row.getLastCellNum(), maxCellNum);
} else {
rowDataList = new ArrayList<String>(maxCellNum - startCellNum);
cellNum = maxCellNum;
}
//遍历列
for (int j = startCellNum; j < cellNum; j++) {
Cell cell = row.getCell(j);
tempValue = getCellValue(cell);
if(!isStart || startCellNum == START) {
tempValueList.add(tempValue);
} else {
rowDataList.add(tempValue);
}
if(!isStart && includeColNameSet.contains(tempValue)) {
isPossible = true;
}
}
if(startCellNum == START && isPossible) {
//获取可用数据所在的列
startCellNum = getColIndex(tempValueList, includeColNameSet);
}
//可信位置存在,则判断其可信度
if(!isStart && startCellNum != START) {
rowDataList = new ArrayList<String>();
colNameMap = getColNameMap(sheet, index, startCellNum, colLineNum);
maxCellNum = startCellNum + colNameMap.size();
//添加有用数据
for(int k = startCellNum; k < maxCellNum; k++) {
rowDataList.add(tempValueList.get(k));
}
dataList.add(rowDataList);
rowIndex++;
isStart = true;
} else if(startCellNum != START) {
if(isEquals(dataList.get(rowIndex-1), rowDataList)) {
break;
} else {
dataList.add(rowDataList);
rowIndex++;
}
}
index++;
}
SheetResult sheetResult = new SheetResult();
sheetResult.setColNameMap(colNameMap);
sheetResult.setDataList(dataList);
return sheetResult;
}
/**
* 读取Sheet页数据封装成对象
* @param wb
* @param sheetName
* @param includeColNameSet
* @param colLineNum 列名行数
* @return
*/
public static SheetResult readFromSheet(Workbook wb, String sheetName,
Set<String> includeColNameSet, int colLineNum) {
Sheet sheet = wb.getSheet(sheetName);
return readFromSheet(sheet, includeColNameSet, colLineNum);
}
/**
* 获取列名与数据索引映射
* @param sheet
* @param rowNo
* @param cellNo
* @param colLineNum
* @return
*/
private static Map<String, Integer> getColNameMap(Sheet sheet, int rowNo,
int cellNo, int colLineNum) {
//TODO 获取列名信息
Map<String, Integer> colNameMap = new TreeMap<String, Integer>();
colNameMap.put("1", 1);
colNameMap.put("2", 1);
colNameMap.put("3", 1);
colNameMap.put("4", 1);
colNameMap.put("5", 1);
colNameMap.put("6", 1);
return colNameMap;
}
/**
* 获取Cell对象表示的值
* @param cell
* @return
*/
private static String getCellValue(Cell cell) {
if(cell == null) {
return "";
}
String tempValue = null;
//判断Cell的类型
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
tempValue = cell.getRichStringCellValue().getString();
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
tempValue = cell.getDateCellValue().toString();
} else {
tempValue = String.valueOf(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
tempValue = String.valueOf(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
tempValue = cell.getCellFormula();
break;
default:
tempValue = "";
break;
}
return tempValue;
}
/**
* 获取有用数据所在的列
* @param tempValueList
* @param includeColNameSet
* @return
*/
private static int getColIndex(List<String> tempValueList,
Set<String> includeColNameSet) {
for(int i = 0, len = tempValueList.size(); i < len; i++) {
//如果包含指定的列名
if(includeColNameSet.contains(tempValueList.get(i))) {
if(isTruePosition(tempValueList, i, includeColNameSet)) {
return i;
}
}
}
return START;
}
/**
* 判断当前行所在数据是否是有用数据所在的行
* @param tempValueList
* @param start
* @param includeColNameSet
* @return
*/
private static boolean isTruePosition(List<String> tempValueList, int start,
Set<String> includeColNameSet) {
boolean isPosition = true;
if(tempValueList.size() < start + includeColNameSet.size()) {
isPosition = false;
return isPosition;
}
for(int i = start, len = start + includeColNameSet.size(); i < len; i++) {
if(!includeColNameSet.contains(tempValueList.get(i))) {
isPosition = false;
break;
}
}
return isPosition;
}
/**
* 判断两行数据是否相同
* @param valueList
* @param compareValueList
* @return
*/
private static boolean isEquals(List<String> valueList, List<String> compareValueList) {
boolean isEnd = true;
int len = valueList.size() > compareValueList.size() ? compareValueList.size() : valueList.size();
for(int i = 0; i < len; i++) {
if(!compareValueList.get(i).equals("")
&& !valueList.get(i).equals(compareValueList.get(i))) {
isEnd = false;
}
}
return isEnd;
}
public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
//Name: /xl/workbook.xml - Content Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml
Workbook wb = getWorkBook("J:\\MyEclipse2014\\studyworkspace\\MicroftOffice\\temp\\test.xlsx");
//Name: /xl/workbook.xml - Content Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml
//org.apache.poi.hssf.usermodel.HSSFWorkbook@77d0bef0
// wb = getWorkBook("J:\\MyEclipse2014\\studyworkspace\\MicroftOffice\\temp\\test2007.xls");
System.out.println(System.currentTimeMillis() - startTime);
System.out.println(wb);
readFromSheet(wb, "type");
Set<String> includeColNameSet = new HashSet<String>();
includeColNameSet.add("START");
includeColNameSet.add("VOL");
includeColNameSet.add("VOH");
includeColNameSet.add("DFS");
includeColNameSet.add("FG");
readFromSheet(wb, "type", includeColNameSet);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
readFromSheet(wb, "type", includeColNameSet, 1);
long endTime = System.currentTimeMillis();
System.out.println(endTime - startTime);
}
}
发现主要的时间还是在获取Workbook对象上,而对于97-03的文件的读取速度会比07的文件快很多
excel内容如下图所示: