poi 通过驱动模式（SAX）解析防止读取大数据excel文件导致的内存溢出

最新推荐文章于 2020-10-08 16:22:29 发布

weixin_30535043

最新推荐文章于 2020-10-08 16:22:29 发布

阅读量383

点赞数

文章标签：移动开发大数据 java

原文链接：http://www.cnblogs.com/lzjdm/p/10407846.html

版权

前几天项目中遇到了导入大数据的excel表格导致jvm内存溢出的情况，现记录下解决方案。

poi 解析文件分为用户模式和驱动模式解析，用户模式无非就是new HSSFWorkbook(is)，直接通过文件流读取，这种方式占用的内存比较大，大数据不建议采用此方式解析。

驱动模式：将文件解析成csv格式的数据，也是跟用户模式一样分03版和07版的excel进行解析，下面直接贴代码，后面的main方法为测试方法。

XLS2CSV.java

/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.ExtendedFormatRecord;
import org.apache.poi.hssf.record.FontRecord;
import org.apache.poi.hssf.record.FormatRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * 用sax解析xls 格式文档 转成csv格式
 * 
 *
 */
public class XLS2CSV implements HSSFListener {
    private int minColumns;
    private POIFSFileSystem fs;
    private PrintStream output;
    
    
    public List<ArrayList<String>> getData() {
        return data;
    }

    // 当前行
    private int curRow = 0;

    // 存储行记录的容器
    private List<String> rowlist = new ArrayList<String>();
    
    //样式记录容器
    private List<String> rowType = new ArrayList<String>();
    @SuppressWarnings("unchecked")
//    private ArrayList boundSheetRecords = new ArrayList();
    
    private String sheetName;
  //Excel数据
    private List<ArrayList<String>> data = new ArrayList<ArrayList<String>>();

    private int lastRowNumber;
    private int lastColumnNumber;

     private int currentSheetChildPage = 1;
    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;

    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    private HSSFWorkbook stubWorkbook;

    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;
    
    /** So we known which sheet we're on */
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;
    private List boundSheetRecords = new ArrayList<>();

    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;

    /**
     * Creates a new XLS -> CSV converter
     * @param fs The POIFSFileSystem to process
     * @param output The PrintStream to output the CSV to
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
        this.fs = fs;
        this.output = output;
        this.minColumns = minColumns;
    }

    /**
     * Creates a new XLS -> CSV converter
     * @param filename The file to process
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public XLS2CSV(InputStream is, int minColumns) throws IOException, FileNotFoundException {
        this(
                new POIFSFileSystem(is),
                System.out, minColumns
        );
    }

    /**
     * Initiates the processing of the XLS file to CSV
     */
    public void process() throws IOException {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();

        if(outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }

        factory.processWorkbookEvents(request, fs);
    }

    /**
     * Main HSSFListener method, processes events, and outputs the
     *  CSV as the file is processed.
     */
    @Override
    public void processRecord(Record record) {
         int thisRow = -1;
            int thisColumn = -1;
            String thisStr = null;
            String value = null;
            
            
            switch (record.getSid()) {
            //---------add start---------
            case FontRecord.sid://字体记录
                /*FontRecord font = (FontRecord) record;
                
                short boldWeight = font.getBoldWeight();
                short fontHeight = font.getFontHeight();
                short colorPaletteIndex = font.getColorPaletteIndex();
                cellStyle = "style='";index++;
                cellStyle += "font-weight:" + boldWeight + ";"; // 
                cellStyle += "font-size: " + fontHeight / 2 + "%;"; // 
    */            break;
            case FormatRecord.sid://单元格样式记录
                /*FormatRecord format = (FormatRecord) record;*/
                break;
            case ExtendedFormatRecord.sid://扩展单元格样式记录
                /*ExtendedFormatRecord extendedFormat = (ExtendedFormatRecord) record;
                short borderTop = extendedFormat.getBorderTop();
                short borderRight = extendedFormat.getBorderRight();
                short borderBottom = extendedFormat.getBorderBottom();
                short leftBorderPaletteIdx = extendedFormat.getLeftBorderPaletteIdx();
                
                short alignment = extendedFormat.getAlignment();
                short verticalAlignment = extendedFormat.getVerticalAlignment();
                
                index++;
                alignStyle = "align='" + convertAlignToHtml(alignment) + "' ";
                alignStyle += "valign='" + convertVerticalAlignToHtml(verticalAlignment) + "' ";// 
                
                StringBuffer sb = new StringBuffer();
                sb.append(getBorderStyle(0, borderTop));
                sb.append(getBorderStyle(1, borderRight));
                sb.append(getBorderStyle(2, borderBottom));
                sb.append(getBorderStyle(3, leftBorderPaletteIdx));
                cellStyle += sb.toString();*/
                break;
            //---------add end---------
            case BoundSheetRecord.sid://遍历所有boundSheetRecord,每个sheet对应一个boundSheetRecord
                boundSheetRecords.add(record);
                break;
            case BOFRecord.sid://type=5为workbook的开始
                BOFRecord br = (BOFRecord) record;
                if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                    // 如果有需要，则建立子工作薄
                    if (workbookBuildingListener != null && stubWorkbook == null) {
                        stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                    }

                    sheetIndex++;
                    if (orderedBSRs == null) {
                        orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                    }
                    sheetName = orderedBSRs[sheetIndex].getSheetname();
                    /*if(currentSheetIndex!=-1 && sheetIndex > currentSheetIndex){
                        if(data.size()>0){
                            String writeSheetName = orderedBSRs[sheetIndex-1].getSheetname();
                            String sheetDir = dirPath + "/" + writeSheetName;
                            String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                                    + writeSheetName + "_" + currentSheetChildPage + ".html";
                            writeHtml(writeSheetName, htmlPath);
                            data.clear();
                            currentSheetChildPage=1;
                        }
                    }
                    currentSheetIndex = sheetIndex;*/
                }
                break;
                
            case EOFRecord.sid:
                /*if(sheetIndex!=-1){
                    if(data.size()>0){
                        String sheetDir = dirPath + "/_a"+ (sheetIndex+1) + "-" + sheetName;
                        String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                                + sheetName + "_" + currentSheetChildPage + ".html";
                        boolean writeHtml = writeHtml(orderedBSRs[sheetIndex].getSheetname(), htmlPath);
                        data.clear();
                        if(writeHtml) currentSheetChildPage++;
                    }
                }*/
                currentSheetChildPage=1;
                break;
            case SSTRecord.sid://存储了xls所有文本单元格值，通过索引获取
                sstRecord = (SSTRecord) record;
                break;

            case BlankRecord.sid:
                BlankRecord brec = (BlankRecord) record;
                thisRow = brec.getRow();
                thisColumn = brec.getColumn();
                thisStr = "";
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case BoolErrRecord.sid: // 单元格为布尔类型
                BoolErrRecord berec = (BoolErrRecord) record;
                thisRow = berec.getRow();
                thisColumn = berec.getColumn();
                thisStr = berec.getBooleanValue() + "";
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;

            case FormulaRecord.sid: // 单元格为公式类型
                FormulaRecord frec = (FormulaRecord) record;
                thisRow = frec.getRow();
                thisColumn = frec.getColumn();
                if (outputFormulaValues) {
                    if (Double.isNaN(frec.getValue())) {
                        // Formula result is a string
                        // This is stored in the next record
                        outputNextStringRecord = true;
                        nextRow = frec.getRow();
                        nextColumn = frec.getColumn();
                    } else {
                        thisStr = formatListener.formatNumberDateCell(frec);
                    }
                } else {
                    thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
                }
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case StringRecord.sid:// 单元格中公式的字符串
                if (outputNextStringRecord) {
                    // String for formula
                    StringRecord srec = (StringRecord) record;
                    thisStr = srec.getString();
                    thisRow = nextRow;
                    thisColumn = nextColumn;
                    outputNextStringRecord = false;
                }
                break;
            case LabelRecord.sid:
                LabelRecord lrec = (LabelRecord) record;
                curRow = thisRow = lrec.getRow();
                thisColumn = lrec.getColumn();
                value = lrec.getValue().trim();
                value = value.equals("") ? " " : value;
                this.rowlist.add(thisColumn, value);
               //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case LabelSSTRecord.sid: // 单元格为字符串类型
                LabelSSTRecord lsrec = (LabelSSTRecord) record;
                curRow = thisRow = lsrec.getRow();
                thisColumn = lsrec.getColumn();
                if (sstRecord == null) {
                    rowlist.add(thisColumn, " ");
                    //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                } else {
                    value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
                    value = value.equals("") ? " " : value;
                    rowlist.add(thisColumn, value);
                    //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                }
                break;
            case NumberRecord.sid: // 单元格为数字类型
                NumberRecord numrec = (NumberRecord) record;
                curRow = thisRow = numrec.getRow();
                thisColumn = numrec.getColumn();
                value = formatListener.formatNumberDateCell(numrec).trim();
                value = value.equals("") ? " " : value;
                // 向容器加入列值
                rowlist.add(thisColumn, value);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            default:
                break;
            }

            // 遇到新行的操作
            if (thisRow != -1 && thisRow != lastRowNumber) {
                lastColumnNumber = -1;
            }

            // 空值的操作
            if (record instanceof MissingCellDummyRecord) {
                MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
                curRow = thisRow = mc.getRow();
                thisColumn = mc.getColumn();
                rowlist.add(thisColumn, " ");
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
            }

            // 更新行和列的值
            if (thisRow > -1)
                lastRowNumber = thisRow;
            if (thisColumn > -1)
                lastColumnNumber = thisColumn;

            // 行结束时的操作
            if (record instanceof LastCellOfRowDummyRecord) {
                if (minColumns > 0) {
                    // 列值重新置空
                    if (lastColumnNumber == -1) {
                        lastColumnNumber = 0;
                    }
                }
                lastColumnNumber = -1;

                // 每行结束时， 调用getRows() 方法(打印内容)
                //rowReader.getRows(sheetIndex, curRow, rowlist);
                
                ArrayList<String> list = new ArrayList<>();
                list.addAll(rowlist);
                data.add(list);
               /* if(data.size()==2000){
                    String sheetDir = dirPath + "/_a"+ (sheetIndex+1)+ "-" + sheetName;
                    String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                            + sheetName + "_" + currentSheetChildPage + ".html";
                    boolean writeHtml = writeHtml(orderedBSRs[sheetIndex].getSheetname(), htmlPath);
                    data.clear();
                    if(writeHtml) currentSheetChildPage++;
                }*/
                /*List<String> styleList = new ArrayList<>();
                styleList.addAll(rowType);
                styleData.add(styleList);
    */            
                // 清空容器
                rowlist.clear();
            }
    }

    public static void main(String[] args) throws Exception {


        XLS2CSV xls2csv = new XLS2CSV(new FileInputStream("数据xls.xls"), 20);
        xls2csv.process();
        List<ArrayList<String>> data2 = xls2csv.getData();
        for (ArrayList<String> arrayList : data2) {
            System.out.println(arrayList.toString());
        }
        
    }
}

XLSX2CSV.java



import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;  
  
/** 
 * A rudimentary XLSX -> CSV processor modeled on the 
 * POI sample program XLS2CSVmra from the package 
 * org.apache.poi.hssf.eventusermodel.examples. 
 * As with the HSSF version, this tries to spot missing 
 * rows and cells, and output empty entries for them. 
 * <p/> 
 * Data sheets are read using a SAX parser to keep the 
 * memory footprint relatively small, so this should be 
 * able to read enormous workbooks.  The styles table and 
 * the shared-string table must be kept in memory.  The 
 * standard POI styles table class is used, but a custom 
 * (read-only) class is used for the shared string table 
 * because the standard POI SharedStringsTable grows very 
 * quickly with the number of unique strings. 
 * <p/> 
 * For a more advanced implementation of SAX event parsing 
 * of XLSX files, see {@link XSSFEventBasedExcelExtractor} 
 * and {@link XSSFSheetXMLHandler}. Note that for many cases, 
 * it may be possible to simply use those with a custom 
 * {@link SheetContentsHandler} and no SAX code needed of 
 * your own! 
 */  
/**
 * 用sax解析xlsx 格式文档 转成csv格式
 * 
 *
 */
public class XLSX2CSV {  
    /** 
     * Uses the XSSF Event SAX helpers to do most of the work 
     * of parsing the Sheet XML, and outputs the contents 
     * as a (basic) CSV. 
     */  
    private class SheetToCSV implements SheetContentsHandler {  
        private boolean firstCellOfRow = false;  
        private int currentRow = -1;  
        private int currentCol = -1;  
  
        private void outputMissingRows(int number) {  
            for (int i = 0; i < number; i++) {  
            	curstr = new ArrayList<String>();
                for (int j = 0; j < minColumns; j++) {  
                	curstr.add("");  
                }  
                output.add(curstr);  
            }  
        }  
  
        @Override  
        public void startRow(int rowNum) {  
        	curstr = new ArrayList<String>();
            // If there were gaps, output the missing rows  
            outputMissingRows(rowNum - currentRow - 1);  
            // Prepare for this row  
            firstCellOfRow = true;  
            currentRow = rowNum;  
            currentCol = -1;  
        }  
  
        @Override  
        public void endRow(int rowNum) {  
            // Ensure the minimum number of columns  
            for (int i = currentCol; i < minColumns ; i++) {  
                curstr.add("");  
            }  
            output.add(curstr);  
        }  
  
        @Override  
        public void cell(String cellReference, String formattedValue,  
                         XSSFComment comment) {  
//            if (firstCellOfRow) {  
//                firstCellOfRow = false;  
//            } else {  
//                curstr.append(',');  
//            }  
  
            // gracefully handle missing CellRef here in a similar way as XSSFCell does  
            if (cellReference == null) {  
                cellReference = new CellAddress(currentRow, currentCol).formatAsString();  
            }  
  
            // Did we miss any cells?  
            int thisCol = (new CellReference(cellReference)).getCol();  
            int missedCols = thisCol - currentCol - 1;  
            for (int i = 0; i < missedCols; i++) {  
                curstr.add("");  
            }  
            currentCol = thisCol;  
  
            // Number or string?  
            try {  
                Double.parseDouble(formattedValue);  
                curstr.add(formattedValue);  
            } catch (NumberFormatException e) {  
               // output.append('"');  
            	curstr.add(formattedValue);  
             //   output.append('"');  
            }  
        }  
  
        @Override  
        public void headerFooter(String text, boolean isHeader, String tagName) {  
            // Skip, no headers or footers in CSV  
        }  
    }  
  
  
    ///  
  
    private final OPCPackage xlsxPackage;  
  
    /** 
     * Number of columns to read starting with leftmost 
     */  
    private final int minColumns;  
  
    /** 
     * Destination for data 
     */  
    
    private List<ArrayList<String>> output;
    private ArrayList<String> curstr;
    
    public  List<ArrayList<String>> get_output(){
    	return output;
    }
    
    /** 
     * Creates a new XLSX -> CSV converter 
     * 
     * @param pkg        The XLSX package to process 
     * @param output     The PrintStream to output the CSV to 
     * @param minColumns The minimum number of columns to output, or -1 for no minimum 
     */  
    public XLSX2CSV(OPCPackage pkg, int minColumns) {  
        this.xlsxPackage = pkg;  
        this.minColumns = minColumns;  
    }  
    
  
    /** 
     * Parses and shows the content of one sheet 
     * using the specified styles and shared-strings tables. 
     * 
     * @param styles 
     * @param strings 
     * @param sheetInputStream 
     */  
    public void processSheet(  
            StylesTable styles,  
            ReadOnlySharedStringsTable strings,  
            SheetContentsHandler sheetHandler,  
            InputStream sheetInputStream)  
            throws IOException, ParserConfigurationException, SAXException {  
        DataFormatter formatter = new DataFormatter();  
        InputSource sheetSource = new InputSource(sheetInputStream);  
        try {  
            XMLReader sheetParser = SAXHelper.newXMLReader();  
            ContentHandler handler = new XSSFSheetXMLHandler(  
                    styles, null, strings, sheetHandler, formatter, false);  
            sheetParser.setContentHandler(handler);  
            sheetParser.parse(sheetSource);  
        } catch (ParserConfigurationException e) {  
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());  
        }  
    }  
  
    /** 
     * Initiates the processing of the XLS workbook file to CSV. 
     * 
     * @throws IOException 
     * @throws OpenXML4JException 
     * @throws ParserConfigurationException 
     * @throws SAXException 
     */  
    public void process()  
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {  
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);  
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);  
        StylesTable styles = xssfReader.getStylesTable();  
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();  
//        int index = 0;  
        while (iter.hasNext()) {  
        	output = new ArrayList<ArrayList<String>> ();
            InputStream stream = iter.next();  
//            String sheetName = iter.getSheetName();  
//            System.out.println("正在读取sheet： "+sheetName + " [index=" + index + "]:");  
            processSheet(styles, strings, new SheetToCSV(), stream);  
//            System.out.println("sheet 读取完成!");
            stream.close();  
//            ++index;  
        }  
    }  
  
    
//    public static void main(String[] args) throws Exception {  
//      /*  if (args.length < 1) { 
//            System.err.println("Use:"); 
//            System.err.println("  XLSX2CSV <xlsx file> [min columns]"); 
//            return; 
//        }*/  
//  
//        File xlsxFile = new File("F:\\8月数据.xlsx");  
//        if (!xlsxFile.exists()) {  
//            System.err.println("Not found or not a file: " + xlsxFile.getPath());  
//            return;  
//        }  
//  
//        int minColumns = -1;  
//        if (args.length >= 2)  
//            minColumns = Integer.parseInt(args[1]);  
//  
//        // The package open is instantaneous, as it should be.  
//        OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);  
//        XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);  
//        xlsx2csv.process();  
//        p.close();  
//    }  
}

下面提供一个excel读取工具类进行读取，返回的数据类型为 List<ArrayList<String>>，

Excel_reader.java

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
 
public class Excel_reader {
 
    // *************xlsx文件读取函数************************
    // 在jdbc.properties上加上 excelUrl：xlsx文件的目录
    // excel_name为文件名，arg为需要查询的列号(输入数字则返回对应列 , 输入字符串则固定返回这个字符串)
    // 返回
    @SuppressWarnings({ "resource", "unused" })
    public static List<ArrayList<String>> xlsx_reader(InputStream xlsxFile, ArrayList<Object> args)
            throws IOException {
        // 读取excel文件夹url
        /*Properties properties = new Properties();
        InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
        properties.load(inStream);
        String excelUrl = properties.getProperty("excelUrl");*/
 
//        File xlsxFile = new File(excelUrl + excel_name);
        /*if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return null;
        }*/
        List<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();
        try {
            OPCPackage p;
            p = OPCPackage.open(xlsxFile);
//            p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
            XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列数
            xlsx2csv.process();
            excel_output = xlsx2csv.get_output();
            p.close();   //释放
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
 
//        System.out.println(excel_name + " 读取完毕");
 
        // //读取xlsx文件
        // XSSFWorkbook xssfWorkbook = null;
        // //寻找目录读取文件
        // System.out.println("开始读取 "+excel_name);
        // File excelFile = new File(excelUrl+excel_name);
        // InputStream is = new FileInputStream(excelFile);
        // xssfWorkbook = new XSSFWorkbook(is);
        //
        // if(xssfWorkbook==null){
        // System.out.println("未读取到内容,请检查路径！");
        // return null;
        // }else{
        // System.out.println(excel_name+" 读取完毕");
        // }
 
        List<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
        // 遍历xlsx中的sheet
 
        // 对于每个sheet，读取其中的每一行
        for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
            ArrayList<String> cur_output = excel_output.get(rowNum);
            ArrayList<String> curarr = new ArrayList<String>();
            for (int columnNum = 0; columnNum < args.size(); columnNum++) {
                Object obj = args.get(columnNum);
                if (obj instanceof String) {
                    curarr.add(obj.toString());
                } else if (obj instanceof Integer) {
                    String cell = cur_output.get((int) obj);
                    curarr.add(cell);
                } else {
                    System.out.print("类型错误！");
                    return null;
                }
            }
            ans.add(curarr);
        }
 
        return ans;
    }
    
    
    public static List<ArrayList<String>> xls_reader(InputStream xlsxFile)
            throws IOException {
        // 读取excel文件夹url
        /*Properties properties = new Properties();
        InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
        properties.load(inStream);
        String excelUrl = properties.getProperty("excelUrl");*/
 
//        File xlsxFile = new File(excelUrl + excel_name);
        /*if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return null;
        }*/
        List<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();;
        try {
            XLS2CSV xls2csv = new XLS2CSV(xlsxFile, 20);
            xls2csv.process();
            excel_output = xls2csv.getData();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
 
//        System.out.println(excel_name + " 读取完毕");
 
        // //读取xlsx文件
        // XSSFWorkbook xssfWorkbook = null;
        // //寻找目录读取文件
        // System.out.println("开始读取 "+excel_name);
        // File excelFile = new File(excelUrl+excel_name);
        // InputStream is = new FileInputStream(excelFile);
        // xssfWorkbook = new XSSFWorkbook(is);
        //
        // if(xssfWorkbook==null){
        // System.out.println("未读取到内容,请检查路径！");
        // return null;
        // }else{
        // System.out.println(excel_name+" 读取完毕");
        // }
 
/*        ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
        // 遍历xlsx中的sheet
 
        // 对于每个sheet，读取其中的每一行
        for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
            ArrayList<String> cur_output = excel_output.get(rowNum);
            ArrayList<String> curarr = new ArrayList<String>();
            for (int columnNum = 0; columnNum < args.size(); columnNum++) {
                Object obj = args.get(columnNum);
                if (obj instanceof String) {
                    curarr.add(obj.toString());
                } else if (obj instanceof Integer) {
                    String cell = cur_output.get((int) obj);
                    curarr.add(cell);
                } else {
                    System.out.print("类型错误！");
                    return null;
                }
            }
            ans.add(curarr);
        }*/
 
        return excel_output;
    }
 
//    // 判断后缀为xlsx的excel文件的数据类
//    @SuppressWarnings("deprecation")
//    private static String getValue(XSSFCell xssfRow) {
//        if (xssfRow == null) {
//            return null;
//        }
//        if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
//            return String.valueOf(xssfRow.getBooleanCellValue());
//        } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
//            double cur = xssfRow.getNumericCellValue();
//            long longVal = Math.round(cur);
//            Object inputValue = null;
//            if (Double.parseDouble(longVal + ".0") == cur)
//                inputValue = longVal;
//            else
//                inputValue = cur;
//            return String.valueOf(inputValue);
//        } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
//                || xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
//            return "";
//        } else {
//            return String.valueOf(xssfRow.getStringCellValue());
//        }
//    }
 
    public static void main(String[] args) throws FileNotFoundException {
        File xlsxFile = new File("数据1.xlsx");
        //读取xlsx文件测试
        /*InputStream is = new FileInputStream(xlsxFile);
        ArrayList<Object> a = new ArrayList<>();
        //需要读取的列
                a.add(0);
                a.add(1);
                a.add(2);
        try {
            List<ArrayList<String>> xlsx_reader = xlsx_reader(is,a);
            for (int i = 0; i < xlsx_reader.size(); i++) {
                System.out.println(xlsx_reader.get(i));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }*/
        
        //读取xls文件测试
        XLS2CSV xls2csv;
        try {
            xls2csv = new XLS2CSV(new FileInputStream("数据xls.xls"), 20);
            xls2csv.process();
            List<ArrayList<String>> data2 = xls2csv.getData();
            for (ArrayList<String> arrayList : data2) {
                System.out.println(arrayList.toString());
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

解析成的格式为　　

[小红, 男, , , , , , , ]
[小明, 男, , , , , , , ]

没数据的显示为空，解析xlsx文件需要传入需要读取的列的集合。

转载于:https://www.cnblogs.com/lzjdm/p/10407846.html

weixin_30535043

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
poi 通过驱动模式（SAX）解析防止读取大数据excel文件导致的内存溢出

前几天项目中遇到了导入大数据的excel表格导致jvm内存溢出的情况，现记录下解决方案。poi 解析文件分为用户模式和驱动模式解析，用户模式无非就是new HSSFWorkbook(is)，直接通过文件流读取，这种方式占用的内存比较大，大数据不建议采用此方式解析。驱动模式：将文件解析成csv格式的数据，也是跟用户模式一样分03版和07版的excel进行解析，下面直接贴代码，后面的mai...
复制链接

扫一扫