方便自己的问题总结:poi处理大文件xlsx会内存溢出问题,换sax的xml处理方式来处理就解决了,也做了优化和代码抽象

首先org.xml.sax.helpers.DefaultHandler是用来处理xml文件的,这个清楚之后就可以开干了。

这里我把几个文件格式统一抽象

import java.io.File;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import org.xml.sax.helpers.DefaultHandler;

/**
 * @ClassName: FileAbstractReader
 * @Description: 文件读取器,支持四种文件格式
 * @author omh
 * @date 2021-1-28
 */
public abstract class FileReader extends DefaultHandler {
	
	protected int sheetIndex = -1;
	protected List<Object> rowlist = new ArrayList<Object>();
    protected int curRow = 0; // 当前行
	protected FileHandler fileHandler;

	/**
	 * @Description:创建一个新的实例 FileAbstractReader
	 * @param fileHandler
	 */
	public FileReader(FileHandler fileHandler) {
		this.fileHandler = fileHandler;
	}
	
	/**
	 * @Title: optRows
	 * @Description: 文件行处理调用方法
	 * @param @param sheetIndex
	 * @param @param curRow
	 * @param @param rowlist
	 * @param @throws SQLException 参数
	 * @return void 返回类型
	 * @throws
	 * @author omh
	 */
	public void optRows(int sheetIndex, int curRow, List<Object> rowlist) throws SQLException {
		// TODO Auto-generated method stub
		fileHandler.handle(sheetIndex,curRow,rowlist);
	}
	
	/**
	 * @Title: process
	 * @Description: 文件处理方法
	 * @param @param file
	 * @param @throws Exception 参数
	 * @return void 返回类型
	 * @throws
	 * @author omh
	 */
	public abstract void process(File file) throws Exception;
}

统一实现handler

import java.util.List;

/**  
* @Title: FileHandler 
* @Description: excel文件处理器
* @author minghua ou  
* @date 2020年2月26日  
*/
public interface FileHandler {

	/**  
	 * @Title: handle  
	 * @Description:  
	 * @author minghua ou  
	 * @date 2020年2月26日  
	 * @param @param sheetIndex
	 * @param @param curRow
	 * @param @param rowlist 
	 */  
	void handle(int sheetIndex, int curRow, List<Object> rowlist);
}
/**
 * @ClassName: AbstractHandler
 * @Description: TODO(这里用一句话描述这个类的作用)
 * @author omh
 * @date 2021-1-28
 */
public abstract class AbstractHandler implements FileHandler{
	
	protected int row = 0;

	public int getRow() {
		return row;
	}

	public void setRow(int row) {
		this.row = row;
	}
}

xlsx格式处理,这个使用poi是会内存溢出的,也是本文关键解决的问题

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.Iterator;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/**  
* @Title: XLXSFileReader 
* @Description: 
* @author minghua ou  
* @date 2020年2月14日  
*/
public class XLSXFileReader extends FileReader {
    /**
	 * @Description:创建一个新的实例 XLXSFileReader
	 * @param fileHandler
	 */
	public XLSXFileReader(FileHandler fileHandler) {
		super(fileHandler);
	}

	private SharedStringsTable sst;
    private String lastContents;
    private boolean nextIsString;

    private int curCol = 0; // 当前列索引
    private int preCol = 0; // 上一列列索引
    private int titleRow = 0; // 标题行,一般情况下为0
    private int rowsize = 0; // 列数

    // 只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3

    /**
     * @param filename
     * @param sheetId sheetId为要遍历的sheet索引,从1开始,1-3
     * @throws Exception
     */
    public void processOneSheet(String filename, int sheetId) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        // rId2 found by processing the Workbook
        // 根据 rId# 或 rSheet# 查找sheet
        InputStream sheet2 = r.getSheet("rId" + sheetId);
        sheetIndex++;
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
    }

    /**
     * 遍历 excel 文件
     */
    public void process(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        Iterator<InputStream> sheets = r.getSheetsData();
        while (sheets.hasNext()) {
            curRow = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
        }
    }

    /**
     * 遍历 excel 文件
     */
    @Override
    public void process(File file) throws Exception {
        OPCPackage pkg = OPCPackage.open(new FileInputStream(file));
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        Iterator<InputStream> sheets = r.getSheetsData();
		if (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
//        while (sheets.hasNext()) {
//            curRow = 0;
//            sheetIndex++;
//            InputStream sheet = sheets.next();
//            InputSource sheetSource = new InputSource(sheet);
//            parser.parse(sheetSource);
//            sheet.close();
//        }
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader();
        // .createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        // c => 单元格
        if ("c".equals(name)) {
            // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
            String cellType = attributes.getValue("t");
            String rowStr = attributes.getValue("r");
            curCol = this.getRowIndex(rowStr);
            if (cellType != null && "s".equals(cellType)) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
        }
        // 置空
        lastContents = "";
    }

    public void endElement(String uri, String localName, String name) throws SAXException {
        // 根据SST的索引值的到单元格的真正要存储的字符串
        // 这时characters()方法可能会被调用多次
        if (nextIsString) {
            try {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
            } catch (Exception e) {

            }
        }
        // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
        // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
        if ("v".equals(name)) {
            String value = lastContents.trim();
            value = "".equals(value) ? " " : value;
            int cols = curCol - preCol;
            if (cols > 1) {
                for (int i = 0; i < cols - 1; i++) {
                    rowlist.add(preCol, "");
                }
            }
            preCol = curCol;
            rowlist.add(curCol - 1, value);
        } else {
            // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
            if ("row".equals(name)) {
                int tmpCols = rowlist.size();
                if (tmpCols > 0) {
                    if (curRow > this.titleRow && tmpCols < this.rowsize) {
                        for (int i = 0; i < this.rowsize - tmpCols; i++) {
                            rowlist.add(rowlist.size(), "");
                        }
                    }
                    try {
                        optRows(sheetIndex, curRow, rowlist);
                    } catch (SQLException e) {
//                        logger.error("endElement error", e);
                    }
                } else {// 跳过空白行
//                    logger.info("jump blank row ,curRow:" + (curRow + 1));
                }
                if (curRow == this.titleRow) {
                    this.rowsize = rowlist.size();
                    if (this.rowsize == 0) {
                        throw new RuntimeException("excel 未被编辑或者内容为空");
                    }
                }

                rowlist.clear();
                curRow++;
                curCol = 0;
                preCol = 0;
            }
        }
    }

    public void characters(char[] ch, int start, int length) throws SAXException {
        // 得到单元格内容的值
        lastContents += new String(ch, start, length);
    }

    // 得到列索引,每一列c元素的r属性构成为字母加数字的形式,字母组合为列索引,数字组合为行索引,
    // 如AB45,表示为第(A-A+1)*26+(B-A+1)*26列,45行
    public int getRowIndex(String rowStr) {
        rowStr = rowStr.replaceAll("[^A-Z]", "");
        byte[] rowAbc = rowStr.getBytes();
        int len = rowAbc.length;
        float num = 0;
        for (int i = 0; i < len; i++) {
            num += (rowAbc[i] - 'A' + 1) * Math.pow(26, len - i - 1);
        }
        return (int) num;
    }

    public int getTitleRow() {
        return titleRow;
    }

    public void setTitleRow(int titleRow) {
        this.titleRow = titleRow;
    }

    public int getSheetIndex() {
        return sheetIndex;
    }
}

xls格式处理,没有涉及内存溢出的问题

import java.io.File;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;

/**
 * @ClassName: XLXFileReader
 * @Description: TODO(这里用一句话描述这个类的作用)
 * @author omh
 * @date 2021-1-28
 */
public class XLSFileReader extends FileReader {

	/**
	 * @Description:创建一个新的实例 XLXFileReader
	 * @param fileHandler
	 */
	public XLSFileReader(FileHandler fileHandler) {
		super(fileHandler);
		// TODO Auto-generated constructor stub
	}

	/* (非 Javadoc)
	 * @param file
	 * @throws Exception
	 * @see 
	 */
	@Override
	public void process(File file) throws Exception {
		// TODO Auto-generated method stub
		Workbook workbook = Workbook.getWorkbook(file);
		Sheet sheet = workbook.getSheet(0);
		int rows = sheet.getRows();
		for (int j = 0; j < rows; j++) {
			Cell[] cells = sheet.getRow(j);
			for (Cell cell : cells) {
				String cellStr = cell.getContents().trim();
				rowlist.add(cellStr);
			}
			optRows(sheetIndex, curRow, rowlist);
			curRow++;
			rowlist.clear();
		}
		workbook.close();
	}

}

txt格式处理

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;


/**
 * @ClassName: TXTFileReader
 * @Description: TODO(这里用一句话描述这个类的作用)
 * @author omh
 * @date 2021-1-28
 */
public class TXTFileReader extends FileReader {

	/**
	 * @Description:创建一个新的实例 TXTFileReader
	 * @param fileHandler
	 */
	public TXTFileReader(FileHandler fileHandler) {
		super(fileHandler);
		// TODO Auto-generated constructor stub
	}

	/* (非 Javadoc)
	 * @param file
	 * @throws Exception
	 * @see 
	 */
	@Override
	public void process(File file) throws Exception {
		// TODO Auto-generated method stub
        //获取文件编码
        FileEncodeReferee fer = new FileEncodeReferee(file);
        InputStreamReader isr = new InputStreamReader(new FileInputStream(file), fer.getCharset());
        BufferedReader in = new BufferedReader(isr);
        String line = null;
        while ((line = in.readLine()) != null) {
            line = line.trim();
            String[] cells = line.split(" ");
            for (String cell : cells) {
            	rowlist.add(cell);
			}
            optRows(sheetIndex, curRow, rowlist);
            curRow++;
			rowlist.clear();
        }
	}

}
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;

public class FileEncodeReferee {
	private File file;

	public FileEncodeReferee(File file) {
		this.file = file;
	}

	public FileEncodeReferee(String path) {
		file = new File(path);
	}

	public String getCharset() {
		File file = this.file;

		String charset = "GBK";
		byte[] first3Bytes = new byte[3];
		BufferedInputStream bis = null;
		try {
			// boolean checked = false;
			bis = new BufferedInputStream(new FileInputStream(file));
			bis.mark(0);
			int read = bis.read(first3Bytes, 0, 3);
			if (read == -1) {
				return charset;
			}
			if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {
				charset = "UTF-16LE";
				// checked = true;
			} else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF) {
				charset = "UTF-16BE";
				// checked = true;
			} else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB
					&& first3Bytes[2] == (byte) 0xBF) {
				charset = "UTF-8";
				// checked = true;
			}
			/** *//*******************************************************************
					 * bis.reset(); if (!checked) { int loc = 0; while ((read = bis.read()) != -1) {
					 * loc++; if (read >= 0xF0) { break; } if (0x80 <= read && read <= 0xBF) //
					 * 单独出现BF以下的,也算是GBK { break; } if (0xC0 <= read && read <= 0xDF) { read =
					 * bis.read(); if (0x80 <= read && read <= 0xBF)// 双字节 (0xC0 - 0xDF) { // (0x80
					 * - 0xBF),也可能在GB编码内 continue; } else { break; } } else if (0xE0 <= read && read
					 * <= 0xEF) { // 也有可能出错,但是几率较小 read = bis.read(); if (0x80 <= read && read <=
					 * 0xBF) { read = bis.read(); if (0x80 <= read && read <= 0xBF) { charset =
					 * "UTF-8"; break; } else { break; } } else { break; } } }
					 * System.out.println(loc + " " + Integer.toHexString(read)); }
					 ******************************************************************/
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (bis != null) {
				try {
					bis.close();
				} catch (Exception ex) {
					ex.printStackTrace();
				}
			}
		}
		return charset;
	}

	public static void main(String[] args) {
		FileEncodeReferee fer = new FileEncodeReferee("C:/测试.csv");
		System.out.println(fer.getCharset());
	}
}

csv格式处理

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import com.Ostermiller.util.ExcelCSVParser;
import com.Ostermiller.util.LabeledCSVParser;

/**
 * @ClassName: CSVFileReader
 * @Description: csv文件读取器
 * @author omh
 * @date 2021-1-28
 */
public class CSVFileReader extends FileReader {

	/**
	 * @Description:创建一个新的实例 CSVFileReader
	 * @param fileHandler
	 */
	public CSVFileReader(FileHandler fileHandler) {
		super(fileHandler);
	}

	/* (非 Javadoc)
	 * @param in
	 * @throws Exception
	 * @see 
	 */
	@Override
	public void process(File file) throws Exception {
		// TODO Auto-generated method stub
		FileInputStream in = new FileInputStream(file);
		FileEncodeReferee fer = new FileEncodeReferee(file);
		InputStreamReader inReader = new InputStreamReader(in, fer.getCharset());
		LabeledCSVParser csvParser = new LabeledCSVParser(new ExcelCSVParser(inReader));
		String[] lines = csvParser.getLabels();
		if(lines.length > 0) {
			for (String cell : lines) {
				rowlist.add(cell);
			}
			optRows(sheetIndex, curRow, rowlist);
			curRow++;
			rowlist.clear();
		}
		String[] line;
		while ((line = csvParser.getLine()) != null) {
			for (String cell : line) {
				rowlist.add(cell);
			}
			optRows(sheetIndex, curRow, rowlist);
			curRow++;
			rowlist.clear();
		}
		csvParser.close();
		inReader.close();
		in.close();
	}

}

读取器工厂

/**
 * @ClassName: FileReaderFactory
 * @Description: 文件读取器工厂
 * @author omh
 * @date 2021-1-28
 */
public class FileReaderFactory {

	public static FileReader createFileReader(FileHandler fileHandler, String fileType){
		FileReader fileReader = null;
		switch (fileType) {
		case "csv":
			fileReader = new CSVFileReader(fileHandler);
			break;
		case "txt":
			fileReader = new TXTFileReader(fileHandler);
			break;
		case "xls":
			fileReader = new XLSFileReader(fileHandler);
			break;
		case "xlsx":
			fileReader = new XLSXFileReader(fileHandler);
			break;
		default:
			break;
		}
		return fileReader;
	}
}

测试代码,只需要在AbstractHandler里实现业务就可以了,业务代码我就不贴了

String fileType = FileUtil.getExtention(file.getName());
		AbstractHandler abstractHandler = new AbstractHandler() {
            @Override
            public void handle(int sheetIndex, int curRow, List<Object> rowlist) {
                // TODO Auto-generated method stub
                row++;
                //syso
            }
        };
		try {
			FileReaderFactory.createFileReader(abstractHandler , fileType).process(file);
        } catch (Exception e) {
            // TODO: handle exception
        	e.printStackTrace();
        }

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值