Author: kagula
Initial Date: 2016-8-2
Last Update Date: 2017-06-16
Environment:
maven eclipse mars.2 jdk1.8
Introduce:
How to extract the specified column data by title name from the excel 2003/2007 file demo.
the excel file first row is header for distinguish column, after that is body will be extract.
Content:
Part1: Maven
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
Part2: Source
package com.nuoke.escortcashbox.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FilenameUtils;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class ExcelHelper {
private static Logger logger = LoggerFactory.getLogger(ExcelHelper.class);
private final static String XLS = "xls";
private final static String XLSX = "xlsx";
/*
* file: a full path in the storage.
* sheetNum: sheet index in the file, base is 0.
* listKyes: list key you want to extract from the excel file, must be in left to right order.
*/
public static List<List<String>> fromExcel2List(File file,
int sheetNum,
List<String> listKeys)
throws IOException {
return fromExcel2List(new FileInputStream(file),
FilenameUtils.getExtension(file.getName()), sheetNum,listKeys);
}
public static List<List<String>> fromExcel2List(InputStream is,
String extensionName, int sheetNum, List<String> listKeys) throws IOException {
Workbook workbook = null;
if (extensionName.toLowerCase().equals(XLS)) {
workbook = new HSSFWorkbook(is);
} else if (extensionName.toLowerCase().equals(XLSX)) {
workbook = new XSSFWorkbook(is);
}
return fromExcel2List(workbook, sheetNum,listKeys);
}
private static List<List<String>> fromExcel2List(Workbook workbook,
int sheetNum, List<String> listKeys) {
Sheet sheet = workbook.getSheetAt(sheetNum);
// 解析公式结果
FormulaEvaluator evaluator = workbook.getCreationHelper()
.createFormulaEvaluator();
List<List<String>> list = new ArrayList<List<String>>();
int minRowIx = sheet.getFirstRowNum();
int maxRowIx = sheet.getLastRowNum();
if( maxRowIx - minRowIx <= 0 )
{
logger.warn("Rows is not enough!");
return null;
}
//Extract index in excel corresponding keys.
Row rowHeader = sheet.getRow(minRowIx);
Map<String,Integer> mapSI = new HashMap<String,Integer>();
short minColIx = rowHeader.getFirstCellNum();
short maxColIx = rowHeader.getLastCellNum();
for(int i=minColIx; i<=maxColIx; i++)
{
Cell cell = rowHeader.getCell(new Integer(i));
CellValue cellValue = evaluator.evaluate(cell);
if (cellValue == null) {
continue;
}
if(cellValue.getCellTypeEnum()!=CellType.STRING)
continue;
String t = cellValue.getStringValue().trim();
mapSI.put(t, i);
}
//User must assure key order.
List<Integer> listKeyInIndex = new ArrayList<Integer>();
for(int i=0; i<listKeys.size(); i++)
{
String key = listKeys.get(i);
if(mapSI.containsKey(key) == false)
{
logger.warn("Can not find key in the excel file!");
return null;
}
listKeyInIndex.add(mapSI.get(key));
}
//Extract content
for (int rowIx = minRowIx+1; rowIx <= maxRowIx; rowIx++) {
Row row = sheet.getRow(rowIx);
Map<Integer,String> mapRow = new HashMap<Integer,String>();
List<String> listRow = new ArrayList<String>();
minColIx = row.getFirstCellNum();
maxColIx = row.getLastCellNum();
for (int colIx = minColIx; colIx <= maxColIx; colIx++) {
Cell cell = row.getCell(new Integer(colIx));
CellValue cellValue = evaluator.evaluate(cell);
if (cellValue == null) {
continue;
}
// Here, only exist Boolean、Numeric and String three types except Error type.
// Other types, According official site can be ignore http://poi.apache.org/spreadsheet/eval.html
switch (cellValue.getCellTypeEnum()) {
case BOOLEAN:
mapRow.put(colIx, cellValue.getBooleanValue()+"");
break;
case NUMERIC:
//data type will be convert to numeric type, so should be distinguish.
/*
if (DateUtil.isCellDateFormatted(cell)) {
mapRow.put(colIx, cell.getDateCellValue()+"");
} else {
mapRow.put(colIx, cellValue.getNumberValue()+"");
}
*/
//下面这行代码解决了“123”变成“123.0”问题。
mapRow.put(colIx, new BigDecimal(cell.getNumericCellValue()).toString());
break;
case STRING:
mapRow.put(colIx, cellValue.getStringValue());
break;
case FORMULA:
break;
case BLANK:
break;
case ERROR:
break;
default:
break;
}//end case
}//end for
for(int i=0;i<listKeyInIndex.size();i++)
{
String strColVal = "";
if(mapRow.containsKey(listKeyInIndex.get(i)))
{
strColVal = mapRow.get(listKeyInIndex.get(i));
}
listRow.add(strColVal);
}
list.add(listRow);
}//end for
return list;
}
}
Last Part: Test
@Test
public void TestParseExcelFile() throws IOException
{
String path = "C:\\Users\\kagula\\20160705.xls";
List<List<String>> list = null;
List<String> parameters = new ArrayList<String>();
parameters.add("地市");
parameters.add("加油站");
parameters.add("加油站(描述)");
list = ExcelHelper.fromExcel2List(new File(path), 0, parameters);
for(int i=0;i<list.size();i++)
{
System.out.println(list.get(i));
}//end for
}//end function
Reference
[1]《Java操作Excel文件导入》