以下内容均来自实际项目需求,记录下,网上找的读取Excel会出现不少问题,下面代码是经过好多次测试改进的符合现在项目的代码,如有不同要求,请自行修改,代码均只去掉了包名。
注:我们的Excel 第一行是表头,其他行是数据
1、第一行遇到空列,后面的内容自动忽略掉,
2、如果数据中间有一行空白行,继续读,只有连续两行活着以上是空白行,下面的就忽略掉不读取了。
完整代码如下
WDWUtil.java
/**
* 判断Excel 文件的版本
* Created by Administrator on 2018/7/4.
*/
public class WDWUtil {
// @描述:是否是2003的excel,返回true是2003
public static boolean isExcel2003(String filePath) {
return filePath.matches("^.+\\.(?i)(xls)$");
}
//@描述:是否是2007的excel,返回true是2007
public static boolean isExcel2007(String filePath) {
return filePath.matches("^.+\\.(?i)(xlsx)$");
}
}
ExcelUtils
此类适用于预览数据和真正上传数据(预览数据时读取前一百条数据,正常上传读取全部数据)
其中包含空行空列的处理逻辑,代码中均加了注释
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Excel 文件的处理
* Created by Administrator on 2018/7/4.
*/
public class ExcelUtils {
//总行数
private int totalRows = 0;
//总条数
private int totalCells = 0;
//错误信息接收器
private String errorMsg;
// 是否是预览
private boolean isPriview = true;
//构造方法
public ExcelUtils() {
}
//构造方法
public ExcelUtils(boolean isPriview) {
this.isPriview=isPriview;
}
//获取总行数
public int getTotalRows() {
return totalRows;
}
//获取总列数
public int getTotalCells() {
return totalCells;
}
//获取错误信息
public String getErrorInfo() {
return errorMsg;
}
/**
* 验证EXCEL文件
*
* @param filePath
* @return
*/
public boolean validateExcel(String filePath) {
if (filePath == null || !(WDWUtil.isExcel2003(filePath) || WDWUtil.isExcel2007(filePath))) {
errorMsg = "文件名不是excel格式";
return false;
}
return true;
}
/**
* 读EXCEL文件
*
* @param
* @return
*/
public Map<String, Object> getExcelInfo(String fileName, String tmpFilePath) {
Map<String, Object> result = new HashMap<String, Object>();
File fa = new File(tmpFilePath);
InputStream is = null;
try {
is= new FileInputStream(fa);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
//验证文件名是否合格
if (!validateExcel(fileName)) {
errorMsg = "文件不是excel格式";
return null;
}
//根据文件名判断文件是2003版本还是2007版本
boolean isExcel2003 = true;
if (WDWUtil.isExcel2007(fileName)) {
isExcel2003 = false;
}
// 获取excel内容
Workbook wb = getExcelInfo(is, isExcel2003);
List customerList = null;
List titleList = null;
Map columnstypes = null;
// 读取标题信息 其中也设置了有效列数量
titleList = readExcelTitle(wb);
//读取Excel信息
customerList = readExcelValue(wb);
if(isPriview){
columnstypes = getColumnType(wb);
customerList.add(0, columnstypes);
}
result.put("error", errorMsg);
result.put("tablename", fileName.substring(0, fileName.lastIndexOf('.')));
result.put("schema", titleList);
result.put("data", customerList);
result.put("columnstypes", columnstypes);
is.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
is = null;
e.printStackTrace();
}
}
}
return result;
}
/**
* 根据excel里面的内容
*
* @param is 输入流
* @param isExcel2003 excel是2003还是2007版本
* @return
*/
public Workbook getExcelInfo(InputStream is, boolean isExcel2003) {
/** 根据版本选择创建Workbook的方式 */
Workbook wb = null;
try {
//当excel是2003时
if (isExcel2003) {
wb = new HSSFWorkbook(is);
} else { //当excel是2007时
wb = new XSSFWorkbook(is);
}
return wb;
} catch (IOException e) {
e.printStackTrace();
}
return wb;
}
/**
* 读取Excel内容
*
* @param wb
* @return
*/
private List readExcelValue(Workbook wb) {
//得到第一个shell
Sheet sheet = wb.getSheetAt(0);
//得到Excel的行数
this.totalRows = sheet.getPhysicalNumberOfRows();
//得到Excel的列数(前提是有行数)
// 0816 已经在获取标题的时候设置了有效列 totalCells
if (isPriview && totalRows > 100) {
totalRows = 101;
}
// 记录空行 规则 如果空行大于1行 下面的视为垃圾数据 忽略 20180820 yunguang modified
int blankLine=0;
List valueList = new ArrayList();
//循环Excel行数,从第二行开始。标题不入库
for (int r = 1; r < totalRows; r++) {
Row row = sheet.getRow(r);
if (row == null) {
// 遇到空白行 获取的行数加1
this.totalRows++;
blankLine++;
if (blankLine > 1) {
// totalrows 重新定义总行数 20180820 yunguang modified
this.totalRows = r;
break;
}
continue;
} else { // 无空白行 重置计数器
blankLine = 0;
}
List temp = new ArrayList();
// 标记是否为插入的空白行 识别规则 插入的数据后第一个单元格为空
boolean addFlag = false;
//循环Excel的列
for (int c = 0; c < this.totalCells; c++) {
Cell cell = row.getCell(c);
if (null != cell) {
String cellValue = getCellValue(cell);
// 针对又见插入的行 poi默认它不算空行 判断该行如果有一个 不为空 该条记录视为有效 20180820 yunguang modified
if ("".equals(cellValue) && (!addFlag)) {
addFlag = false;
} else {
addFlag = true;
}
if("".equals(cellValue)){
temp.add("\\N");
}
else {
temp.add(cellValue);
}
} else {
temp.add("\\N");
}
}
if (addFlag) { // 判断是否为有效数据
valueList.add(temp);
}
}
return valueList;
}
/**
* 读取Excel表头
*
* @param wb
* @return
*/
private List readExcelTitle(Workbook wb) {
//得到第一个shell
Sheet sheet = wb.getSheetAt(0);
//得到Excel的行数
this.totalRows = sheet.getPhysicalNumberOfRows();
//得到Excel的列数(前提是有行数)
if (totalRows >= 1 && sheet.getRow(0) != null) {
this.totalCells = sheet.getRow(0).getPhysicalNumberOfCells();
}
List titleList = new ArrayList();
// 读取标题
Row row = sheet.getRow(0);
if (row == null) return null;
//循环Excel的列
for (int c = 0; c < this.totalCells; c++) {
Map temp = new HashMap();
Cell cell = row.getCell(c);
if (null != cell) {
temp.put("name", getCellValue(cell));
titleList.add(temp);
}
else {
// 0816 遇到一个空白标题 结束
this.totalCells=c;
break;
}
}
return titleList;
}
/**
* 读取Excel表头
*
* @param wb
* @return
*/
private Map getColumnType(Workbook wb) {
//得到第一个shell
Sheet sheet = wb.getSheetAt(0);
//得到Excel的行数
this.totalRows = sheet.getPhysicalNumberOfRows();
//得到Excel的列数(前提是有行数)
if (totalRows >= 1 && sheet.getRow(0) != null) {
this.totalCells = sheet.getRow(0).getPhysicalNumberOfCells();
}
if (this.totalRows > 101) {
totalRows = 101;
}
// 0,string
Map rowColumns = new HashMap();
// 记录空行 规则 如果空行大于1行 下面的视为垃圾数据 忽略 20180820 yunguang modified
int blankLine=0;
//循环Excel行数,从第二行开始。标题不入库
for (int r = 1; r < totalRows; r++) {
Row row = sheet.getRow(r);
if (row == null) {
this.totalRows++;
blankLine ++;
if (blankLine > 1) {
// totalrows 重新定义总行数 20180820 yunguang modified
this.totalRows = r;
break;
}
continue;
}
else { // 无空白行 重置计数器
blankLine = 0;
}
//循环Excel的列
for (int c = 0; c < this.totalCells; c++) {
Cell cell = row.getCell(c);
if (null != cell) {
String cellValue = getCellValue(cell);
Object value = rowColumns.get(c);
String val = (String) value;
String valType =FileOperateUtil.getType(cellValue);
if (!"string".equals(val)) {
if("string".equals(valType)){
rowColumns.put(c,valType);
}
else if(!"double".equals(val)){
rowColumns.put(c,valType);
}
}
}
else {
rowColumns.put(c,"string");
}
}
}
return rowColumns;
}
private String getCellValue(Cell cell) {
String value = "";
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
switch (cell.getCellTypeEnum()) {
case STRING:
value = cell.getRichStringCellValue().getString();
break;
case NUMERIC:
if ("General".equals(cell.getCellStyle().getDataFormatString())) {
// 数据格式
DecimalFormat df = new DecimalFormat("#.########");
value = df.format(cell.getNumericCellValue())+"";
} else if ("m/d/yy".equals(cell.getCellStyle().getDataFormatString())) {
value = sdf.format(cell.getDateCellValue())+"";
} else {
// 针对十位数以上的数字出现科学记数法的处理 20180820 yunguang modified
value = new DecimalFormat("#").format(cell.getNumericCellValue());
}
break;
case BOOLEAN:
value = cell.getBooleanCellValue() + "";
break;
case BLANK:
value = "";
break;
default:
value = cell.toString();
break;
}
return value;
}
}
CSVUtils
import com.csvreader.CsvReader;
import info.monitorenter.cpdetector.io.*;
import lombok.extern.slf4j.Slf4j;
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* CSV 文件的操作类
* Created by Administrator on 2018/7/10.
*/
@Slf4j
public class CSVUtils {
// 预览或者正式上传(true 为预览)
private boolean isPriview = true;
public CSVUtils() {
}
public CSVUtils(boolean isPriview) {
this.isPriview = isPriview;
}
/**
* 导出**
*
* @param file @param file csv文件(路径+文件名),csv文件不存在会自动创建
* @param dataList 数据
* @return
*/
public static boolean exportCsv(File file, List<String> dataList) {
boolean isSucess = false;
FileOutputStream out = null;
OutputStreamWriter osw = null;
BufferedWriter bw = null;
try {
out = new FileOutputStream(file);
osw = new OutputStreamWriter(out, "UTF-8");
bw = new BufferedWriter(osw);
if (dataList != null && !dataList.isEmpty()) {
for (String data : dataList) {
bw.append(data).append("\r");
}
}
isSucess = true;
} catch (Exception e) {
isSucess = false;
} finally {
if (bw != null) {
try {
bw.close();
bw = null;
} catch (IOException e) {
e.printStackTrace();
}
}
if (osw != null) {
try {
osw.close();
osw = null;
} catch (IOException e) {
e.printStackTrace();
}
}
if (out != null) {
try {
out.close();
out = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
return isSucess;
}
/**
* 导入
*
* @param file csv文件(路径+文件)
* @return
*/
public static List<String> importCsv(File file) {
List<String> dataList = new ArrayList<String>();
BufferedReader br = null;
try {
InputStreamReader reader = new InputStreamReader(new FileInputStream(file), "UTF-8");
br = new BufferedReader(reader);
String line = "";
while ((line = br.readLine()) != null) {
dataList.add(line);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
br = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
return dataList;
}
/**
* 调用该方法的模块:
* 本地调用
* 功能描述:
* 获取该文件内容的编码格式
* @param:
* @return:
* @auther: solmyr
* @date: 2018/8/16 下午3:29
*/
private Charset getFileEncode(String filePath) {
try {
File file = new File(filePath);
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(ASCIIDetector.getInstance());
detector.add(UnicodeDetector.getInstance());
Charset charset = null;
charset = detector.detectCodepage(file.toURI().toURL());
if (charset != null) {
return charset;
}
} catch (Exception e) {
log.error("get file encode error, filePath: " + filePath, e);
}
return Charset.forName("UTF-8");
}
/**
* 获取 csv 文件信息
*
* @param fileName 文件名
* @param tmpFilePath 接收到的文件对象
* @return
*/
public Map<String, Object> getCSVInfo(String fileName, String tmpFilePath) {
Map<String, Object> result = new HashMap<String, Object>();
String filePath = tmpFilePath;
List titleList = new ArrayList();
List valueList = new ArrayList();
Map rowColumns = new HashMap();
try {
Charset fileEncode = getFileEncode(filePath);
File fa = new File(filePath);
FileInputStream fi = new FileInputStream(fa);
CsvReader cr = new CsvReader(fi, fileEncode);
int i = 0;
while (cr.readRecord()) {
if (i == 0) {
String[] rs = cr.getValues();
for (String s : rs) {
Map temp = new HashMap();
temp.put("name", s);
titleList.add(temp);
}
} else {
if (isPriview && i > 100) break;
List temp = new ArrayList();
String[] rs = cr.getValues();
int k = 0;
for (String s : rs) {
Object value = rowColumns.get(k);
String val = (String) value;
if (!"string".equals(val)) {
if(!"double".equals(val)){
rowColumns.put(k, FileOperateUtil.getType(s));
}
}
temp.add(s);
k++;
}
valueList.add(temp);
}
i++;
}
cr.close();
fi.close();
} catch (IOException e) {
e.printStackTrace();
}
if (isPriview){
valueList.add(0, rowColumns);
}
result.put("error", null);
result.put("tablename", fileName.substring(0, fileName.lastIndexOf('.')));
result.put("schema", titleList);
result.put("data", valueList);
result.put("columnstypes", rowColumns);
return result;
}
}
调用代码接口:
import org.springframework.web.multipart.MultipartFile;
import java.util.Map;
/**
* 对上传的文件进行格式解析
* Created by Administrator on 2018/7/4.
*/
public interface ExcelCsvFileParserService {
/**
* 获取上传文件的目录
* @param userId 用户ID
* @param file 用户上传的文件
* @return
*/
String getUploadPath(String userId,MultipartFile file);
/**
* 上传文件取消获取上传完成
* 删除临时文件
* @param userId 用户ID
* @param fileName 用户上传的文件名
* @return
*/
boolean handlePreviewCancel(String userId,String fileName);
/**
* 获取处理后的结果
* @param isPreview
* @param filename
* @param fullPath
* @return
*/
Map<String, Object> HandlerFile(boolean isPreview,String filename,String fullPath);
}
实现类如下:
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.IOException;
import java.util.Map;
/**
* Created by Administrator on 2018/7/4.
*/
@Service
@Slf4j
public class ExcelCsvFileParserServiceImpl implements ExcelCsvFileParserService {
@Override
public Map<String, Object> HandlerFile(boolean isPreview,String filename,String fullPath) {
// 判断文件类型 是excel 文件还是csc
if (".csv".equals(filename.toLowerCase().substring(filename.toLowerCase().lastIndexOf('.')))) {
return new CSVUtils(isPreview).getCSVInfo(filename, fullPath);
} else {
return new ExcelUtils(isPreview).getExcelInfo(filename, fullPath);
}
}
@Override
public boolean handlePreviewCancel(String userId,String fileName){
boolean isDelete = false;
// 获取上传目录
File upload = FileOperateUtil.getAbsoluteUploadPath();
// 临时存放文件目录
String tmpPath= upload + File.separator +userId+File.separator;
String fullPath = tmpPath + fileName;
File tempFilePath = new File(fullPath);
if (tempFilePath.exists()) {
isDelete = tempFilePath.delete();
}
return isDelete;
}
@Override
public String getUploadPath(String userId, MultipartFile file) {
String filename = file.getOriginalFilename();
// 获取上传目录
File upload = FileOperateUtil.getAbsoluteUploadPath();
// 临时存放文件目录
String tmpPath= upload + File.separator +userId+File.separator;
File tempFilePath = new File(tmpPath);
if (!tempFilePath.exists()) tempFilePath.mkdirs();
String fullPath = tmpPath + filename;
try {
// 保存临时文件
file.transferTo(new File(fullPath));
} catch (IOException e) {
e.printStackTrace();
}
return fullPath;
}
}
如有性能问题或者隐含bug,期待评论拍砖!!!
补充:
***********************************************************************************************************************************
poem文件增加如下(import com.csvreader.CsvReader的jar包)
<dependency>
<groupId>net.sourceforge.javacsv</groupId>
<artifactId>javacsv</artifactId>
<version>2.0</version>
</dependency>
补充 getType 方法
// FileOperateUtil
public static String getType(String str) {
// 优先判断日期类型
String patterndate1 = "\\d{4}(-)\\d{2}(-)\\d{2}\\s\\d{2}(:)\\d{2}(:)\\d{2}";
String patterndate2 = "\\d{4}(-)\\d{2}(-)\\d{2}";
if (str.matches(patterndate1) || str.matches(patterndate2)) {
return FileType.DATE;
} else {
// 先判断double类型
if (str.contains(".")) {
try{
Double.parseDouble(str);
return FileType.DOUBLE;
}
catch (Exception exd){
return FileType.STRING;
}
} else {
try {
Long.parseLong(str);
return FileType.BIGINT;
} catch (Exception e) {
try {
Double.parseDouble(str);
return FileType.DOUBLE;
} catch (Exception ex) {
return FileType.STRING;
}
}
}
}
}