前言
在遇到大数据量excel,50MB大小或数百万级别的数据读取时,使用常用的POI容易导致读取时内存溢出或者cpu飙升。
本文讨论的是针对xlsx格式的excel文件上传,采用com.monitorjbl.xlsx.StreamingReader 。什么是StreamReader?
StreamReader 是 java.io 包中的一个类,用于读取字符流的高级类。它继承自 Reader 类,可以以字符为单位读取文件中的数据。
StreamReader的主要功能?
- 以字符为单位读取文件中的数据
- 提供了多种读取方法,如read()、readLine()等
- 可以指定字符编码,以适应不同类型的文件
StreamReader的优势?
- 简化了文件读取的过程,提供了高层次的读取方法可以处理不同类型的文件,如文本文件、CSV文件等
- 可以读取大型文件,节省内存空间
注:StreamReader只能用遍历形式读取数据
Sheet sheet = wk.getSheetAt(0); //遍历所有的行 for (Row row : sheet) { System.out.println("开始遍历第" + row.getRowNum() + "行数据:"); //遍历所有的列 for (Cell cell : row) { System.out.print(cell.getStringCellValue() + " "); } System.out.println(" "); }
案例步骤
1、导入文件前端接口
Controller.java
/**
* 导入文件前端接口
*/
@PostMapping("/importData")
@ResponseBody
public AjaxResult importData(MultipartFile file, boolean updateSupport) throws Exception {
// 开始时间
Long begin = new Date().getTime();
// excel转换为List集合(约30s~40s)
List<TpInstallationMaintenanceLabelDetailed> tpInstallationMaintenanceLabelDetailedList = largeFilesUtils.importExcelLargeFile(file, updateSupport);
// 结束时间
Long end = new Date().getTime();
// 数据导入(约30s)
String message = importInstallationMaintenanceLabelDetailed(tpInstallationMaintenanceLabelDetailedList, updateSupport);
// 总用时(约60s~70s)
message = message +"<br/>数据转换花费时间 : "+(end - begin) / 1000 + " s" ;
// 返回
return AjaxResult.success(message);
}
2、Excel数据转为List
largeFilesUtils.java
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import com.monitorjbl.xlsx.StreamingReader;
import com.ruoyi.huawei.domain.TpInstallationMaintenanceLabelDetailed;
/**
* 大文件Excel导入
*
* @author y
* @date 2024-03-29
*/
@Service
public class LargeFilesUtils {
/**
* 大文件Excel导入
*
* @param
* @return 工具
*/
public List<TpInstallationMaintenanceLabelDetailed> importExcelLargeFile(MultipartFile file,
boolean updateSupport) {
List<TpInstallationMaintenanceLabelDetailed> tpInstallationMaintenanceLabelDetailedList = new ArrayList<TpInstallationMaintenanceLabelDetailed>();
try {
// 大文件测试开始
InputStream inputStream = file.getInputStream();
// com.monitorjbl.xlsx.StreamingReader
Workbook workbook = StreamingReader.builder().rowCacheSize(1000) // 缓存到内存中的行数(默认是10)
.bufferSize(10240) // 读取资源时,缓存到内存的字节大小(默认是1024)
.open(inputStream);
// 获取第一个Shhet
Sheet sheet = workbook.getSheetAt(0);
//
boolean fastRowBoolean = true;
// monitorjbl只能支持遍历,不能通过指定下标获取
for (Row row : sheet) {
// 判断是否首行
if(fastRowBoolean) {
// 设置为非首行
fastRowBoolean = false;
// continue 语句用于跳过当前循环中剩余的代码,并开始下一次迭代。
continue;
}
// 创建实体
TpInstallationMaintenanceLabelDetailed rowData = new TpInstallationMaintenanceLabelDetailed();
// 列下标初始化
int n = 0;
// 遍历列
for (Cell cell : row) {
//
switch (n) {
// 第一列
case 0:
rowData.setPppoeAccount(cell.getStringCellValue());
break;
// 第二列
case 1:
rowData.setInstallationMaintenanceName(cell.getStringCellValue());
break;
case 2:
rowData.setCounty(cell.getStringCellValue());
break;
case 3:
rowData.setPoorQualityUser(cell.getStringCellValue());
break;
case 4:
rowData.setOldLightCat(cell.getStringCellValue());
break;
case 5:
rowData.setSetTopBoxWirelessConnection(cell.getStringCellValue());
break;
case 6:
rowData.setPleaseUseXgponOnu(cell.getStringCellValue());
break;
case 7:
rowData.setHighTemperatureLightCat(cell.getStringCellValue());
break;
case 8:
rowData.setAnOldSetTopBox(cell.getStringCellValue());
break;
case 9:
rowData.setTwoOldSetTopBoxes(cell.getStringCellValue());
break;
case 10:
rowData.setThreeOldSetTopBoxes(cell.getStringCellValue());
break;
case 11:
rowData.setAnPoorQualityRouter(cell.getStringCellValue());
break;
case 12:
rowData.setTwoPoorQualityRouters(cell.getStringCellValue());
break;
case 13:
rowData.setThreePoorQualityRouters(cell.getStringCellValue());
break;
case 14:
rowData.setThreeOrMoreLowQualityRouters(cell.getStringCellValue());
break;
case 15:
rowData.setThreeOrMoreOldSetTopBoxes(cell.getStringCellValue());
break;
case 16:
rowData.setSeverelyPoorQualityUsersAndOldOpticalCats(cell.getStringCellValue());
break;
// 处理其他属性
default:
break;
}
// 列下标+1
n = n+1;
}
tpInstallationMaintenanceLabelDetailedList.add(rowData);
}
workbook.close();
} catch (Exception e) {
// TODO: handle exception
System.out.println(e);
}
return tpInstallationMaintenanceLabelDetailedList;
}
}
3、List集合数据导入
importInstallationMaintenanceLabelDetailed
/**
* 导入文件分析
*/
public String importInstallationMaintenanceLabelDetailed(List<TpInstallationMaintenanceLabelDetailed> tpInstallationMaintenanceLabelDetailedList, Boolean isUpdateSupport)
{
if (StringUtils.isNull(tpInstallationMaintenanceLabelDetailedList) || tpInstallationMaintenanceLabelDetailedList.size() == 0)
{
throw new ServiceException("导入数据不能为空!");
}
// 执行开始时间
Long begin = new Date().getTime();
// 线程数
final int THREAD_COUNT = 10;
// 每个线程处理的数据量
final int BATCH_SIZE = tpInstallationMaintenanceLabelDetailedList.size() / THREAD_COUNT;
// ExecutorService是Java中对线程池定义的一个接口
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
//
for (int i = 0; i < THREAD_COUNT; i++) {
// List数据开始下标
final int startIndex = i * BATCH_SIZE;
// List数据结束下标
final int endIndex = (i + 1) * BATCH_SIZE;
// 线程池执行
executor.submit(new Runnable() {
public void run() {
// 初始化数据库连接对象
Connection conn = null;
// 初始化预编译的 SQL 语句的对象
PreparedStatement ps = null;
try {
// 获取连接
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tool_platform_db?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&useSSL=false", "root", "123456");//获取连接
// 设置自动提交模式,默认true
conn.setAutoCommit(false);
// sql前缀
String prefix = "INSERT INTO tp_label_detailed ("
+ "account,"
+ "maintenance_name,"
+ "county,quality_user,"
+ "light_cat,wireless_connection,"
+ "xgpon_onu,"
+ "light_cat,"
+ "an_box,two_boxes,"
+ "three_boxes,"
+ "an_router,"
+ "two_routers,"
+ "three_routers,"
+ "three_or_more_routers,"
+ "three_or_more_boxes,"
+ "severely_and_cats"
+ ") VALUES ";
// 创建预编译对象
ps = conn.prepareStatement(prefix);
// 保存sql后缀
StringBuffer suffix = new StringBuffer();
// 执行条数
int number_of_cycles = 0;
//
for (int j = startIndex; j < endIndex; j++) {
// 拼接sql
suffix.append("("+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getPppoeAccount()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getInstallationMaintenanceName()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getCounty()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getPoorQualityUser()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getOldLightCat()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getSetTopBoxWirelessConnection()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getPleaseUseXgponOnu()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getHighTemperatureLightCat()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getAnOldSetTopBox()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getTwoOldSetTopBoxes()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getThreeOldSetTopBoxes()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getAnPoorQualityRouter()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getTwoPoorQualityRouters()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getThreePoorQualityRouters()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getThreeOrMoreLowQualityRouters()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getThreeOrMoreOldSetTopBoxes()+"',"+
"'"+tpInstallationMaintenanceLabelDetailedList.get(j).getSeverelyPoorQualityUsersAndOldOpticalCats()+"'"+
"),"); //拼接sql
number_of_cycles = number_of_cycles +1;
}
// sql拼接
String sql = prefix + suffix.substring(0, suffix.length() - 1);
// 添加预处理sql
ps.addBatch(sql);
// 执行语句
ps.executeBatch();
// 提交
conn.commit();
// 初始化拼接sql
suffix.setLength(0);
// 初始化条数
number_of_cycles = 1;
} catch (SQLException e) {
e.printStackTrace();
} finally {
if (ps != null) {
try {
// 关闭ps
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (conn != null) {
try {
// 关闭数据库连接
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
});
}
//关闭线程池,不接受新任务,但会把已添加的任务执行完
executor.shutdown();
// 等待所有线程完成任务
while (!executor.isTerminated()) {}
System.out.println("完成");
// 结束时间
Long end = new Date().getTime();
// 耗时
logger.debug(tpInstallationMaintenanceLabelDetailedList.size()+"条数据插入花费时间 : " + (end - begin) / 1000 + " s");
//
return "数据导入成功!共 " + tpInstallationMaintenanceLabelDetailedList.size() + " 条!"+"<br/>数据导入花费时间 : "+(end - begin) / 1000 + " s" ;
}