//整体excel加载 问题出现在第一步文件加载,经测试5M文件的加载基本已经到我电脑4G内存的极限了,
//当然如果电脑配置更好的话可以优化jvm,应该会再大一些
一、小excel处理
import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;
public class POIUtil {
/**
* 导入Excel文件
* 内容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件对象
* @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
String[] strKey = strKeys.split(",");
List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
String fileName = excelFile.getPath();
int i = 0;
try {
Workbook workbook = null;
String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
try {
if (fileType.equals("xls")) {
<span style="color:#FF0000;">workbook = new XSSFWorkbook(new FileInputStream(excelFile))</span>;//<span style="font-size:18px;color:#FF0000;">如果文件过大 一定会在里卡死,造成内存溢出</span>
} else if (fileType.equals("xlsx")) {
<span style="color:#FF0000;"> workbook = new HSSFWorkbook(new FileInputStream(excelFile));</span>
}
} catch (Exception e) {
throw new SysOperCustomException("导入文件格式错误: "+e.getMessage());
}
Sheet sheet = workbook.getSheetAt(0);
while (true) {
Row row = sheet.getRow(i);
if (row == null)
break;
Map<String,String> map = new HashMap<String,String>();
Boolean flag = false;
for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){
//System.out.println(keyIndex+" "+row.getCell(keyIndex));
Cell cell = row.getCell(keyIndex);
//null 不可以 设置格式为cell_type_string
if(null != row.getCell(keyIndex))
{
if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
{
// 数值、日期类型
double d = cell.getNumericCellValue();
// 日期类型
Date date = HSSFDateUtil.getJavaDate(d);
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
.format(date));
}
else
{
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
}
}
String cellValue = "";
if(null != cell){
cellValue = row.getCell(keyIndex).getStringCellValue();
}
//如果行数据存在不为空 或 null的数据,则视为有效数据
if(null != cellValue && !cellValue.isEmpty())
flag = true;
map.put(strKey[keyIndex], cellValue.trim());
}
//只添加有效数据
if(flag)
listMap.add(map);
i++;
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("导入中断,错误位置:第"+ i +"行数据!");
}
return listMap;
}
}
调用方法如下
import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;
public class Test {
@org.junit.Test
public void testImport() {
File image = new File("D:\\test.xls");
String titelStr = "bankCustomerNo,cardNumber,tradeAmount,tradeTime,requestId";//实体属性
System.out.println(new Date());
List<Map<String,String>> list = POIUtil.importExcelToMap(image, titelStr);
//模板的判定
//在这里处理业务逻辑与数据处理 实体的创建于保存
for(Map<String, String> map : list) {
User user = new User();
user.setName(map.get("name"));
.
.
.
}
}
}
在我以前所有的excel数据导入都是通过poi的简单应用,直接加载整个excel进行数据返回统计,第一步直接造成了不可能对于大文件、大数据的的处理。因此通过参考http://blog.sina.com.cn/s/blog_628d4dd101017quk.html并在以上的基础上编写了如下工具类,之所以工具类不支持大数据量的处理是因为其中方法optRows编写代码问题,该方法为每一行数据的存储都会调用该方法,当数据量达到10万条数据的时候就会调用十万次,list中存在十万个map数据,map中又存在十万个key、10万个value,并且这些对象会长期持有不会释放掉,因此会造成gc回收出现文件,最后超出内存 异常爆出out of memory
之所以保留了此方法,是因为数据一起返回操作会更简便,业务书写会更方便(相对于大excel处理)
二、中等excel处理
import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;
public class POIUtil {
/**
* 导入Excel文件
* 内容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件对象
* @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
String[] strKey = strKeys.split(",");
List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
String fileName = excelFile.getPath();
int i = 0;
try {
Workbook workbook = null;
String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
try {
if (fileType.equals("xls")) {
workbook = new XSSFWorkbook(new FileInputStream(excelFile));
} else if (fileType.equals("xlsx")) {
workbook = new HSSFWorkbook(new FileInputStream(excelFile));
}
} catch (Exception e) {
throw new SysOperCustomException("导入文件格式错误: "+e.getMessage());
}
Sheet sheet = workbook.getSheetAt(0);
while (true) {
Row row = sheet.getRow(i);
if (row == null)
break;
Map<String,String> map = new HashMap<String,String>();
Boolean flag = false;
for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){
//System.out.println(keyIndex+" "+row.getCell(keyIndex));
Cell cell = row.getCell(keyIndex);
//null 不可以 设置格式为cell_type_string
if(null != row.getCell(keyIndex))
{
if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
{
// 数值、日期类型
double d = cell.getNumericCellValue();
// 日期类型
Date date = HSSFDateUtil.getJavaDate(d);
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
.format(date));
}
else
{
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
}
}
String cellValue = "";
if(null != cell){
cellValue = row.getCell(keyIndex).getStringCellValue();
}
//如果行数据存在不为空 或 null的数据,则视为有效数据
if(null != cellValue && !cellValue.isEmpty())
flag = true;
map.put(strKey[keyIndex], cellValue.trim());
}
//只添加有效数据
if(flag)
listMap.add(map);
i++;
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("导入中断,错误位置:第"+ i +"行数据!");
}
return listMap;
}
/**
* 导入Excel文件 暂时可以读取10万以内数据
* 内容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件对象
* @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importBigExcelToMap(File excelFile, String strKeys) {
String filename = excelFile.getPath();
String fileType = filename.substring(filename.lastIndexOf(".")+1).toLowerCase();
List<Map<String, String>> list = new ArrayList<Map<String, String>>();
try {
if (fileType.equals("xls")) {
list = <strong>HxlsUtil.importExcelToMap</strong>( excelFile, strKeys);
} else if (fileType.equals("xlsx")) {
list = <strong>XxlsUtil.importExcelToMap</strong>( excelFile,strKeys);
}
} catch (Exception e) {
System.out.println("导入文件格式错误: "+e.getMessage());
}
return list;
}
}
1.excel2003工具类
package com.yeepay.prop.common.utils;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* excel 2003 文件处理
*/
public class HxlsUtil implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private int curRow;
private List<String> rowlist;
@SuppressWarnings( "unused")
private String sheetName;
private String title;
private List<Map<String, String>> mapList;
private HxlsUtil(POIFSFileSystem fs, String title)
throws SQLException {
this.fs = fs;
this.output = System.out;
this.minColumns = -1;
this.curRow = 0;
this.rowlist = new ArrayList<String>();
this.title = title;
this.mapList = new ArrayList<Map<String, String>>();
}
private HxlsUtil(String filename, String title) throws IOException, SQLException {
this(new POIFSFileSystem(new FileInputStream(filename)), title);
}
<strong>public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
<span style="color:#FF0000;">String[] titleStr = title.split(",");</span>
<span style="color:#FF0000;">int col</span> = rowlist.size();
<span style="color:#FF0000;">Map<String, String> map = new HashMap<String, String>();</span>
for(int i = 0; i < titleStr.length; i++) {
if(col >= titleStr.length)
map.put(titleStr[i], rowlist.get(i).trim());
else
map.put(titleStr[i], "");
}
return map.size() > 0 ? map : null;
}</strong>
/**
* 遍历 excel 文件
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(
formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* HSSFListener 监听方法,处理 Record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// 如果有需要,则建立子工作薄
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener
.getStubHSSFWorkbook();
}
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord
.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
rowlist.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: //单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue()+"";
rowlist.add(thisColumn, thisStr);
break;
case FormulaRecord.sid: //单元格为公式类型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
frec.getParsedExpression()) + '"';
}
rowlist.add(thisColumn,thisStr);
break;
case StringRecord.sid://单元格中公式的字符串
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("")?" ":value;
this.rowlist.add(thisColumn, value);
break;
case LabelSSTRecord.sid: //单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
rowlist.add(thisColumn, " ");
} else {
value = sstRecord
.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("")?" ":value;
rowlist.add(thisColumn,value);
}
break;
case NumberRecord.sid: //单元格为数字类型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("")?" ":value;
// 向容器加入列值
rowlist.add(thisColumn, value);
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn," ");
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
// 行结束时, 调用 optRows() 方法
lastColumnNumber = -1;
try {
mapList.add(optRows(sheetIndex,curRow, rowlist));
} catch (SQLException e) {
e.printStackTrace();
}
rowlist.clear();
}
}
/**
* 导入Excel文件
* 内容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件对象
* @param title : Map的Key列表,Value为相应的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String title) throws IOException, SQLException {
HxlsUtil hxls = new HxlsUtil(excelFile.getPath(), title);
hxls.process();
return hxls.mapList;
}
}
2.2007excel工具类
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.*;
/**
* Excel 2007采用SAX解析文件,注意JDK版本1.5以上
*/
public class XxlsUtil extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List<String> rowlist = new ArrayList<String>();
private int curRow = 0;
private int curCol = 0;
private String title;
private List<Map<String, String>> mapList;
private XxlsUtil(String fileName, String title) throws Exception {
this.title = title;
this.mapList = new ArrayList<Map<String, String>>();
processOneSheet(fileName, 1);
}
public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
String[] titleStr = title.split(",");
int col = rowlist.size();
Map<String, String> map = new HashMap<String, String>();
for(int i = 0; i < titleStr.length; i++) {
if(col >= titleStr.length)
map.put(titleStr[i], rowlist.get(i).trim());
else
map.put(titleStr[i], "");
// System.out.print(rowlist.get(i) + " ");
}
return map.size() > 0 ? map : null;
}
//只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
public void processOneSheet(String filename,int sheetId) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// rId2 found by processing the Workbook
// 根据 rId# 或 rSheet# 查找sheet
InputStream sheet2 = r.getSheet("rId"+sheetId);
sheetIndex++;
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
/**
* 遍历 excel 文件
*/
public void process(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader();
// .createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => 单元格
if (name.equals("c")) {
// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// 根据SST的索引值的到单元格的真正要存储的字符串
// 这时characters()方法可能会被调用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
} catch (Exception e) {
}
}
// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
if (name.equals("v")) {
String value = lastContents.trim();
value = value.equals("")?" ":value;
rowlist.add(curCol, value);
curCol++;
}else {
//如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
try {
mapList.add(optRows(sheetIndex, curRow, rowlist));
} catch (SQLException e) {
e.printStackTrace();
}
rowlist.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
//得到单元格内容的值
lastContents += new String(ch, start, length);
}
/**
* 导入Excel文件
* 内容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件对象
* @param title : Map的Key列表,Value为相应的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String title){
try {
XxlsUtil xxls = new XxlsUtil(excelFile.getPath(), title);
return xxls.mapList;
}catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
调用方法如下
import com.yeepay.prop.common.utils.POIUtil;
import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
* Description
* PackageName: PACKAGE_NAME
*
* @author hua.jin
* @since 14-12-22 上午9:26
*/
public class Test {
@org.junit.Test
public void testImport() {
File image = new File("D:\\test.xls");
String titelStr = "name,age,gender";
System.out.println(new Date());
List<Map<String,String>> list = POIUtil.<strong>importBigExcelToMap</strong>(image, titelStr); <pre name="code" class="java"> //模板的判定
//在这里处理业务逻辑与数据处理 实体的创建于保存
for(Map<String, String> map : list) {
User user = new User();
user.setName(map.get("name"));
.
.
.
}
}}
三、大excel文件的处理
这个就没什么好说的了,该方法 主要参考了http://blog.csdn.net/goodkuang2012/article/details/7350985,将行数据进行了接口处理,因为一行行数据进行处理也就缺失了整体文件的把控,在异常处理这块 该好好考虑一下了,在此基础上的改动没有太大,也就不再叙述了,有得有失吧
经测试大文件处理80M没有问题