poi操作excel,有多套方法,应用场景和性能对比见 POI读写大数据量EXCEL - tootwo2 - 博客园
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
import java.io.File;
import java.io.IOException;
import java.io.FileOutputStream;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
public class App {
public static void main(String[] args) throws IOException {
SXSSFWorkbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
SXSSFSheet sh1 = wb.createSheet();
SXSSFSheet sh2 = wb.createSheet();
SXSSFSheet sh3 = wb.createSheet();
long s1 = System.currentTimeMillis();
insertData(sh1);
insertData(sh2);
insertData(sh3);
long s2 = System.currentTimeMillis();
FileOutputStream out = new FileOutputStream(new File("E:/sxssf.xlsx"));
wb.write(out);
out.close();
// dispose of temporary files backing this workbook on disk
wb.dispose();
wb.close();
long s3 = System.currentTimeMillis();
System.out.println((s2-s1)/1000 + " " + (s3-s2)/1000);//16秒 15秒
}
private static void insertData(SXSSFSheet sh) {
for(int rownum = 0; rownum < 100*3000; rownum++){
SXSSFRow row = sh.createRow(rownum);
for(int cellnum = 0; cellnum < 10; cellnum++){
SXSSFCell cell = row.createCell(cellnum);
String address = "data_"+rownum+"_"+cellnum;
cell.setCellValue(address);
}
}
}
}
poi解析excle2007,内存占用可能会是文件大小的200倍。很容易内存溢出。
使用流式方法解析excel 2007,避免内存溢出。
package gaofeng.poi;
import java.io.InputStream;
import java.util.HashMap;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* @author zhengqiang.zq
* @date 2018/05/04 ,参考链接:https://poi.apache.org/spreadsheet/how-to.html#sxssf
*/
public class App2 {
public void processOneSheet(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
//从workbook.xml.res 中获取所有需要解析的xml文件,rid1 就是第一个sheet,其target就是该sheet所在的相对路径
//<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
//<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
// <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/>
// <Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
// <Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings" Target="sharedStrings.xml"/>
// <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
// <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet2.xml"/>
//</Relationships>
//
InputStream sheet2 = r.getSheet("rId1");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private class SheetHandler extends DefaultHandler {
/**
* excel 常量数据对象,对应的就是sharedStrings.xml文件中的内容,类似excel中的常量池
*/
private SharedStringsTable sst;
/**
* 当前处理的文本值
*/
private String lastContents;
/**
* 下一个文本是不是String类型
*/
private boolean nextIsString;
/**
* 当前单元格的索引值,从0开始,0:第一列
*/
private Short index;
HashMap<Short,Object> currentRow = new HashMap<>();
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
//第一行
if (name.equals("row")) {
currentRow.put((short) -1, new Long(attributes.getValue("r")));
}
//c => cell 一个单元格,
if (name.equals("c")) {
//r属性表示单元格位置,例如A2,C3
String coordinate = attributes.getValue("r");
CellReference cellReference = new CellReference(coordinate);
//根据r属性获取其列下标,从0开始
index = cellReference.getCol();
//t:属性代表单元格类型
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
//t="s"表示是改单元格是字符串,那么该单元格的实际值值需要去SharedStringsTable中取
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
//从SharedStringsTable中取当前单元格的实际值
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
lastContents = lastContents.replaceAll("\n", "\\\\n");
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
//不管是不是数字还是文本值
currentRow.put(index, lastContents);
}
if (name.equals("row")) {
System.out.println(currentRow);
currentRow = new HashMap<>();
}
}
@Override
public void endDocument() throws SAXException {
System.out.println("endDocument()");
}
/**
* 通知一个元素中的字符,是否处理由自己决定,比如 <v>1</v>,
*
* @param ch The characters. 整个sheet.xml的char[]数组表示
* @param start The start position in the character array. 本次处理的元素值的的开始位置
* @param length The number of characters to use from the ,元素长度
* character array.
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @see ContentHandler#characters
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
//对于lastContents是String类型来说,lastContent存放的是其在SharedStringsTable中的索引,
// 对于是数字类型来说,lastContents存放就是该数字的字符串表示
lastContents = new String(ch, start, length);
System.out.println(lastContents);
}
}
public static void main(String[] args) throws Exception {
new App2().processOneSheet("G:\\明细.xlsx");
System.out.println("-----------------finish, " );
}
}
//---------------------------------------//
{-1=82, 0=2016-01-25, 1=汇缴, 2=201601, 3=1356, 4=42877.05}
150
18
151
1356
44233.05
{-1=83, 0=2016-02-23, 1=汇缴, 2=201602, 3=1356, 4=44233.05}
152
154
153
1356
45589.05
{-1=84, 0=2016-03-21, 1=汇缴\n吃饭, 2=201603, 3=1356, 4=45589.05}
{-1=87}
999
{-1=88, 0=999}
endDocument()
-----------------finish,