POI 读写大量数据到excel

poi操作excel,有多套方法,应用场景和性能对比见 POI读写大数据量EXCEL - tootwo2 - 博客园

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<version>3.17</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.17</version>
		</dependency>


import java.io.File;
import java.io.IOException;
import java.io.FileOutputStream;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;

public class App {
    
    public static void main(String[] args) throws IOException {
        SXSSFWorkbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
        SXSSFSheet sh1 = wb.createSheet();
        SXSSFSheet sh2 = wb.createSheet();
        SXSSFSheet sh3 = wb.createSheet();
        
        long s1 = System.currentTimeMillis();
        insertData(sh1);
        insertData(sh2);
        insertData(sh3);
        long s2 = System.currentTimeMillis();
        FileOutputStream out = new FileOutputStream(new File("E:/sxssf.xlsx"));
        wb.write(out);
        out.close();

        // dispose of temporary files backing this workbook on disk
        wb.dispose();
        wb.close();
        long s3 = System.currentTimeMillis();
        
        System.out.println((s2-s1)/1000 + "   " + (s3-s2)/1000);//16秒   15秒
    }

    private static void insertData(SXSSFSheet sh) {
        for(int rownum = 0; rownum < 100*3000; rownum++){
            SXSSFRow row = sh.createRow(rownum);
            for(int cellnum = 0; cellnum < 10; cellnum++){
                SXSSFCell cell = row.createCell(cellnum);
                String address = "data_"+rownum+"_"+cellnum;
                cell.setCellValue(address);
            }
        }
    }
}

poi解析excle2007,内存占用可能会是文件大小的200倍。很容易内存溢出。

使用流式方法解析excel 2007,避免内存溢出。 

package gaofeng.poi;

import java.io.InputStream;
import java.util.HashMap;


import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * @author zhengqiang.zq
 * @date 2018/05/04 ,参考链接:https://poi.apache.org/spreadsheet/how-to.html#sxssf
 */
public class App2 {
    
    public void processOneSheet(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);
        //从workbook.xml.res 中获取所有需要解析的xml文件,rid1 就是第一个sheet,其target就是该sheet所在的相对路径
        //<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
        //<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
        // <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/>
        // <Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
        // <Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings" Target="sharedStrings.xml"/>
        // <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
        // <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet2.xml"/>
        //</Relationships>
        //
        InputStream sheet2 = r.getSheet("rId1");
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new SheetHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private class SheetHandler extends DefaultHandler {

        /**
         * excel 常量数据对象,对应的就是sharedStrings.xml文件中的内容,类似excel中的常量池
         */
        private SharedStringsTable sst;
        /**
         * 当前处理的文本值
         */

        private String lastContents;
        /**
         * 下一个文本是不是String类型
         */
        private boolean nextIsString;
        /**
         * 当前单元格的索引值,从0开始,0:第一列
         */
        private Short index;
       
        
        HashMap<Short,Object> currentRow = new HashMap<>();

        private SheetHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        @Override
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
            //第一行
            if (name.equals("row")) {
                currentRow.put((short) -1, new Long(attributes.getValue("r")));
            }
            //c => cell 一个单元格,
            if (name.equals("c")) {
                //r属性表示单元格位置,例如A2,C3
                String coordinate = attributes.getValue("r");
                CellReference cellReference = new CellReference(coordinate);
                //根据r属性获取其列下标,从0开始
                index = cellReference.getCol();

                //t:属性代表单元格类型
                String cellType = attributes.getValue("t");
                if (cellType != null && cellType.equals("s")) {
                    //t="s"表示是改单元格是字符串,那么该单元格的实际值值需要去SharedStringsTable中取
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }
            // Clear contents cache
            lastContents = "";
        }

        @Override
        public void endElement(String uri, String localName, String name) throws SAXException {
            if (nextIsString) {
                int idx = Integer.parseInt(lastContents);
                //从SharedStringsTable中取当前单元格的实际值
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                lastContents = lastContents.replaceAll("\n", "\\\\n");
                nextIsString = false;
            }

            // v => contents of a cell
            // Output after we've seen the string contents
            if (name.equals("v")) {
                //不管是不是数字还是文本值
                currentRow.put(index, lastContents);
            }
            if (name.equals("row")) {
                
                System.out.println(currentRow);
                currentRow = new HashMap<>();
            }
        }

        @Override
        public void endDocument() throws SAXException {
            System.out.println("endDocument()");
        }

        /**
         * 通知一个元素中的字符,是否处理由自己决定,比如  <v>1</v>,
         *
         * @param ch     The characters. 整个sheet.xml的char[]数组表示
         * @param start  The start position in the character array. 本次处理的元素值的的开始位置
         * @param length The number of characters to use from the ,元素长度
         *               character array.
         * @throws SAXException Any SAX exception, possibly
         * wrapping another exception.
         * @see ContentHandler#characters
         */
        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            //对于lastContents是String类型来说,lastContent存放的是其在SharedStringsTable中的索引,
            // 对于是数字类型来说,lastContents存放就是该数字的字符串表示
            
            lastContents = new String(ch, start, length);
            System.out.println(lastContents);
        }
    }

    public static void main(String[] args) throws Exception {
        new App2().processOneSheet("G:\\明细.xlsx");
        System.out.println("-----------------finish, " );
    }
}
//---------------------------------------//
{-1=82, 0=2016-01-25, 1=汇缴, 2=201601, 3=1356, 4=42877.05}
150
18
151
1356
44233.05
{-1=83, 0=2016-02-23, 1=汇缴, 2=201602, 3=1356, 4=44233.05}
152
154
153
1356
45589.05
{-1=84, 0=2016-03-21, 1=汇缴\n吃饭, 2=201603, 3=1356, 4=45589.05}
{-1=87}
999
{-1=88, 0=999}
endDocument()
-----------------finish, 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值