import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
/**
* 解析excel
*/
public class ExcelFileParser {
public static Workbook getWb(String path) {
try {
return WorkbookFactory.create(new File(path));
} catch (Exception e) {
throw new RuntimeException("读取EXCEL文件出错", e);
}
}
public static Sheet getSheet(Workbook wb, int sheetIndex) {
if (wb == null) {
throw new RuntimeException("工作簿对象为空");
}
int sheetSize = wb.getNumberOfSheets();
if (sheetIndex < 0 || sheetIndex > sheetSize - 1) {
throw new RuntimeException("工作表获取错误");
}
return wb.getSheetAt(sheetIndex);
}
public static List<List<String>> getExcelRows(Sheet sheet, int startLine, int endLine) {
List<List<String>> list = new ArrayList<List<String>>();
// 如果开始行号和结束行号都是-1的话,则全表读取
if (startLine == -1)
startLine = 0;
if (endLine == -1) {
endLine = sheet.getLastRowNum() + 1;
} else {
endLine += 1;
}
for (int i = startLine; i < endLine; i++) {
Row row = sheet.getRow(i);
if (row == null) { //该行为空,直接跳过
continue;
}
int rowSize = row.getLastCellNum();
List<String> rowList = new ArrayList<String>();
for (int j = 0; j < rowSize; j++) {
Cell cell = row.getCell(j);
String temp = "";
if (cell == null) { //该列为空,赋值双引号
temp = "";
} else {
int cellType = cell.getCellType();
switch (cellType) {
case Cell.CELL_TYPE_STRING:
temp = cell.getStringCellValue().trim();
temp = StringUtils.isEmpty(temp) ? "NULL" : temp;
break;
case Cell.CELL_TYPE_BOOLEAN:
temp = String.valueOf(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
temp = String.valueOf(cell.getCellFormula().trim());
break;
case Cell.CELL_TYPE_NUMERIC:
if (HSSFDateUtil.isCellDateFormatted(cell)) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:hh:ss");
temp = sdf.format(cell.getDateCellValue());
} else {
temp = new DecimalFormat("#.######").format(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BLANK:
temp = "";
break;
case Cell.CELL_TYPE_ERROR:
temp = "ERROR";
break;
default:
temp = cell.toString().trim();
break;
}
}
rowList.add(temp);
}
list.add(rowList);
}
return list;
}
public static void main(String a[]) {
String path = "E:\\123.xlsx";
Workbook wb = getWb(path);
List<List<String>> list = getExcelRows(getSheet(wb, 0), -1, -1);
for (int i = 0; i < list.size(); i++) {
List<String> row = list.get(i);
for (int j = 0; j < row.size(); j++) {
System.out.print(row.get(j) + "\t");
}
System.out.println();
}
}
}
代码流程:
首先根据文件路径去创建一个工作簿对象Workbook
然后通过工作簿对象获取一个工作表对象Sheet
最后对工作表的row进行遍历
注意获取每行的列数,我用的row.getLastCellNum();而不是row.getPhysicalNumberOfCells()
这个方法就可以处理不规则的excel单元格内容了,即某一行3列,某一行4列,5列之类的
然后对行循环内存的单元列也要进行空判断,防止异常
代码提供的方法非常通用,只需传进去一个文件路径,和几个必要的参数即可,有其它需求的话,在此代码上二次开发非常简单
借鉴原贴 点击打开链接