本文共介绍两种方式,第一种是常规POI读取,第二种是大文件读取。
依赖包
<poi.version>4.1.2</poi.version>
<!-- excel工具 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
</dependency>
<!-- 读取大量excel数据时使用 -->
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
<version>2.1.0</version>
</dependency>
第一种方式:
常规POI读取
package com.platform.modules.admin.controller;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Date;
public class ReadExcel {
public static void main(String[] args) throws Exception {
long t1 = new Date().getTime();
excel();
long t2 = new Date().getTime();
System.out.println((t2-t1)/1000 + "秒");
}
public static void excel() throws Exception {
File file = new File("C:\\Users\\Lenovo\\Desktop\\数据导入模板及填写格式 (2).xlsx");
if (!file.exists()){
throw new Exception("文件不存在!");
}
InputStream in = new FileInputStream(file);
// 读取整个Excel
XSSFWorkbook sheets = new XSSFWorkbook(in);
// 获取第一个表单Sheet
XSSFSheet sheetAt = sheets.getSheetAt(0);
//默认第一行为标题行,i = 0
XSSFRow titleRow = sheetAt.getRow(0);
// 循环获取每一行数据
for (int i = 1; i < sheetAt.getPhysicalNumberOfRows(); i++) {
XSSFRow row = sheetAt.getRow(i);
// 读取每一格内容
StringBuilder sb = new StringBuilder();
for (int index = 0; index < row.getPhysicalNumberOfCells(); index++) {
XSSFCell titleCell = titleRow.getCell(index);
XSSFCell cell = row.getCell(index);
cell.setCellType(CellType.STRING);
if (cell.getStringCellValue().equals("")) {
continue;
}
sb.append(cell);
}
System.out.println(i + "\t" + sb);
}
}
}
第二种方式:
大文件excel读取
package com.platform.modules.admin.controller;
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.File;
import java.io.FileInputStream;
import java.util.*;
public class ReadBigExcel {
public static Map<String, List<String>> test(File file) throws Exception{
FileInputStream in = new FileInputStream(file);
Map<String,List<String>> mapData = new HashMap<String, List<String>>();
Workbook wk = StreamingReader.builder()
.rowCacheSize(100) //缓存到内存中的行数,默认是10
.bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024
.open(in); //打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件
int sheetNums = wk.getNumberOfSheets();
for(int i = 0 ; i < sheetNums;i ++){
List<String> sheetData = new ArrayList<String>();
Sheet sheet = wk.getSheetAt(i);
String sheetName = wk.getSheetName(i);
//遍历所有的行
int k = 0;
for (Row row : sheet) {
StringBuilder sb = new StringBuilder();
//遍历所有的列
for (Cell cell : row) {
sb.append(cell.getStringCellValue());
}
System.out.println(k++ + "\t" + sb.toString());
}
}
return mapData;
}
public static void main(String[] args) throws Exception {
File file = new File("C:\\Users\\Lenovo\\Desktop\\数据导入模板及填写格式 (2).xlsx");
long t1 = new Date().getTime();
test(file);
long t2 = new Date().getTime();
System.out.println((t2-t1)/1000 + "秒");
}
}
执行效率比对,横坐标为数据量,纵坐标为执行耗时秒
可以看出大文件读取POI的执行效率比常规POI好很多。
框起来的这一块是执行常规POI所耗资源,读取大文件POI对CPU和内存的占用也叫常规POI低很多,最重要的是不会引起内存溢出。