本文主要介绍使用easyexcel进行数据插入的优化案例集过程!
源码获取:
1.建表语句
create table if not exists `guigu-auth`.user_information
(
id bigint auto_increment
primary key,
username varchar(50) null,
phone_number varchar(20) null,
id_card_number varchar(18) null,
description text null,
home_address varchar(100) null
);
2.数据预览
3.基础实现版本(耗时160s)
/**
* 基础版本
* @param file
* @return
*/
@PostMapping("/import1")
public String importExcel1(@RequestParam("file") MultipartFile file) {
Instant start = Instant.now();
try {
List<UserInformation> allData = new ArrayList<>();
Instant start2 = Instant.now();
EasyExcel.read(file.getInputStream(), UserInformation.class, new PageReadListener<UserInformation>(dataList -> {
allData.addAll(dataList);
})).sheet().doRead();
Instant end2 = Instant.now();
Duration timeElapsed1 = Duration.between(start2, end2);
log.info("加载内存耗时:"+timeElapsed1.toMillis());
// 对数组进行分割
List<List<UserInformation>> partitions = partitionList(allData, BATCH_SIZE);
for (List<UserInformation> partition : partitions) {
Instant start1 = Instant.now();
userInformationService.saveBatch(partition);
Instant end1 = Instant.now();
Duration timeElapsed2 = Duration.between(start1, end1);
log.info("插入耗时"+timeElapsed2.toMillis());
}
allData.clear();
} catch (IOException e) {
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入失败: " + e.getMessage() + ". 时间消耗: " + timeElapsed.toMillis() + " 毫秒.";
}
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入成功. 时间消耗: " + timeElapsed.toMillis() + " 毫秒.";
}
4.线程池版本(耗时57s)
/**
* 线程池版本
* @param file
* @return
*/
@PostMapping("/import2")
public String importExcel2(@RequestParam("file") MultipartFile file) {
Instant start = Instant.now();
ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
try {
Instant start2 = Instant.now();
List<UserInformation> allData = new ArrayList<>();
EasyExcel.read(file.getInputStream(), UserInformation.class, new PageReadListener<UserInformation>(dataList -> {
allData.addAll(dataList);
})).sheet().doRead();
Instant end2 = Instant.now();
Duration timeElapsed1 = Duration.between(start2, end2);
log.info("加载内存耗时:"+timeElapsed1.toMillis());
// 对数组进行分割
List<List<UserInformation>> partitions = partitionList(allData, BATCH_SIZE);
// 提交至线程池处理
for (List<UserInformation> partition : partitions) {
executorService.submit(() -> userInformationService.saveBatch(partition));
}
executorService.shutdown();
while (!executorService.isTerminated()) {
// 等待所有线程完成
}
} catch (IOException e) {
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入失败: " + e.getMessage() + ". 时间消耗: " + timeElapsed.toMillis() + " 毫秒.";
}
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入成功. 时间消耗: " + timeElapsed.toMillis() + " 毫秒.";
}
5.线程池+真批量插入版本(耗时27s)
/**
* 线程池版本+真批量插入
* @param file
* @return
*/
@PostMapping("/import3")
public String importExcel3(@RequestParam("file") MultipartFile file) {
Instant start = Instant.now();
List<UserInformation> allData = new ArrayList<>();
try {
EasyExcel.read(file.getInputStream(), UserInformation.class, new PageReadListener<UserInformation>(dataList ->{
allData.addAll(dataList);
})).sheet().doRead();
// 调用异步方法处理数据导入
asyncImportService.importData(allData);
allData.clear();
} catch (IOException e) {
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入失败: " + e.getMessage() + ". 时间消耗: " + timeElapsed.toMillis() + " 毫秒.";
}
Instant end = Instant.now();
Duration timeElapsed = Duration.between(start, end);
return "导入请求已接受. 时间消耗: " + timeElapsed.toMillis() + " 毫秒. 数据导入将在后台进行.";
}
private <T> List<List<T>> partitionList(List<T> list, int size) {
List<List<T>> partitions = new ArrayList<>();
for (int i = 0; i < list.size(); i += size) {
partitions.add(new ArrayList<>(list.subList(i, Math.min(i + size, list.size()))));
}
return partitions;
}
service核心代码实现:
public void importData(List<UserInformation> dataList) {
ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
log.info("当前线程核数:"+Runtime.getRuntime().availableProcessors());
List<List<UserInformation>> partitions = partitionList(dataList, 10000); // 批量大小设置为1000
Instant start = Instant.now();
log.info("====================开始执行插入操作======================");
for (List<UserInformation> partition : partitions) {
executorService.submit(() -> userInformationService.getBaseMapper().batchInsert(partition));
}
executorService.shutdown();
while (!executorService.isTerminated()) {
}
Instant end = Instant.now();
// 计算时间差
Duration timeElapsed = Duration.between(start, end);
log.info("==============>操作耗时: " + timeElapsed.toMillis() + " 毫秒.");
}
private <T> List<List<T>> partitionList(List<T> list, int size) {
List<List<T>> partitions = new ArrayList<>();
for (int i = 0; i < list.size(); i += size) {
partitions.add(new ArrayList<>(list.subList(i, Math.min(i + size, list.size()))));
}
return partitions;
}