KEY WORDS:CompletableFuture函数、List分片
backbone:区间合并问题(日期格式)
所以最简单的处理方法,串行处理,leetcode常见答案版(是模拟业务逻辑代码不是真项目代码
import cn.hutool.core.collection.ListUtil;
import com.google.common.collect.Lists;
import org.apache.commons.collections4.ListUtils;
import java.time.LocalDate;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.List;
public class completableFutureTest {
//随机生成实例数据
public static List<Map<String, LocalDate>> generateRandomInstances(int count) {
List<Map<String, LocalDate>> instanceList = new ArrayList<>();
Random random = new Random();
for (int i = 0; i < count; i++) {
LocalDate startDate, endDate;
do {
startDate = generateRandomDate(LocalDate.of(1949, 1, 1), LocalDate.of(2025, 12, 31), random);
endDate = generateRandomDate(startDate.plusDays(1), LocalDate.of(2050, 12, 31), random);
} while (endDate.isBefore(startDate)); // 如果结束日期在开始日期之前,则重新生成
Map<String, LocalDate> instance = new HashMap<>();
instance.put("startDate", startDate);
instance.put("endDate", endDate);
instanceList.add(instance);
}
return instanceList;
}
//随机生成时间
private static LocalDate generateRandomDate(LocalDate start, LocalDate end, Random random) {
long startEpochDay = start.toEpochDay();
long endEpochDay = end.toEpochDay();
if (endEpochDay <= startEpochDay) {
// 如果 endEpochDay 小于等于 startEpochDay,直接返回 start 的副本
return LocalDate.of(start.getYear(), start.getMonth(), start.getDayOfMonth());
}
// 修正后的生成随机数的方法,确保 bound 是正数
long randomEpochDay = startEpochDay + random.nextInt((int) (endEpochDay - startEpochDay));
return LocalDate.ofEpochDay(randomEpochDay);
}
//合并区间
public static List<Map<String, LocalDate>> mergeDays(List<Map<String, LocalDate>> instanceList){
//结果
List<Map<String, LocalDate>> mergedDateList = new ArrayList<>();
//获取第一个区间
Map<String, LocalDate> currentMap = instanceList.get(0);
//循环合并
for(Map<String, LocalDate> instanceDate : instanceList){
if (currentMap.get("endDate").plusDays(1).isBefore(instanceDate.get("startDate"))){ //无重叠
mergedDateList.add(currentMap);
currentMap = new HashMap<>(instanceDate);
} else {
if(instanceDate.get("endDate").isAfter(currentMap.get("endDate"))){ //非完全包含
currentMap.put("endDate", instanceDate.get("endDate"));
}
}
}
mergedDateList.add(currentMap); //循环结束,追加
return mergedDateList;
}
public static void main(String[] args) throws ExecutionException, InterruptedException{
int DateSum = 600000;
int subListLength = 200000; // 分片数量
List<Map<String, LocalDate>> instanceList = generateRandomInstances(DateSum); // 随机生成大量数据
instanceList.sort(Comparator.comparing(map -> map.get("startDate"))); // 排序
System.out.println("总数据量:" + DateSum + "/每片数据量:" + subListLength);
/**
* 普通(非多线程)合并方法
*/
long startTimeNormal = System.currentTimeMillis(); // 开始时间
List<Map<String, LocalDate>> resultListNormal = mergeDays(instanceList);
long endTimeNormal = System.currentTimeMillis(); // 结束时间
long durationNormal = endTimeNormal - startTimeNormal; // 计算运行时间
System.out.println("【不切片串行合并】:");
System.out.println("运行时间: " + durationNormal + " 毫秒");
System.out.println(resultListNormal);
long totalDaysNormal = resultListNormal.stream()
.mapToLong(map -> map.get("endDate").toEpochDay() - map.get("startDate").toEpochDay() + 1)
.sum();
System.out.println("总计天数:" + totalDaysNormal);
}
}
问题是当需要处理的业务数据量很大,也就是需要处理的区间数量过多(万+)时,单纯的串行处理会耗费很长时间,所以需要多线程并行处理,考虑CompletableFuture函数。
这个是普通切片方法来做多线程处理:
/**
* 多线程合并方法
* 普通切片方法
*/
long startTime = System.currentTimeMillis(); // 开始时间
int partitionSize = subListLength; // 每个分区的大小
int numPartitions = instanceList.size() / partitionSize; // 计算分区数
// 结果
List<CompletableFuture<List<Map<String, LocalDate>>>> futures = new ArrayList<>();
//List<Map<String, LocalDate>> futures = new ArrayList<>();
// 分区并行处理
for (int i = 0; i < numPartitions; i++) {
int fromIndex = i * partitionSize;
int toIndex = Math.min(fromIndex + partitionSize, instanceList.size());
List<Map<String, LocalDate>> subList = instanceList.subList(fromIndex, toIndex);
CompletableFuture<List<Map<String, LocalDate>>> future = CompletableFuture.supplyAsync(() -> mergeDays(subList));
futures.add(future);
//futures.addAll(future.get());
}
// 剩余部分
if (instanceList.size() % partitionSize != 0) {
List<Map<String, LocalDate>> subList = instanceList.subList(numPartitions * partitionSize, instanceList.size());
CompletableFuture<List<Map<String, LocalDate>>> future = CompletableFuture.supplyAsync(() -> mergeDays(subList));
futures.add(future);
//futures.addAll(future.get());
}
// 等待所有 CompletableFuture 完成
CompletableFuture<Void> allOf = CompletableFuture.allOf(
futures.toArray(new CompletableFuture[0])
);
// 收集所有分区的结果
List<Map<String, LocalDate>> mergedDateList = allOf.thenApply(v ->
futures.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.collect(Collectors.toList())
).get(); // 获取最终结果
// 再合并
//List<Map<String, LocalDate>> resultList = mergeDays(futures);
List<Map<String, LocalDate>> resultList = mergeDays(mergedDateList);
long endTime = System.currentTimeMillis(); // 结束时间
long duration = endTime - startTime; // 计算运行时间
System.out.println("【普通分片方法 + 多线程合并】:");
System.out.println("运行时间: " + duration + " 毫秒");
System.out.println(resultList);
// 计算总天数
long totalDays = resultList.stream()
.mapToLong(map -> map.get("endDate").toEpochDay() - map.get("startDate").toEpochDay() + 1) // +1 to include both start and end date
.sum();
System.out.println("总计天数:"+ totalDays);
需要注意一下【等待所有completablefuture完成】【收集所有分区结果】是必须的,不能直接在每个completablefuture里调用get,不然就没有多线程的意义了。
如果在启动所有completablefuture后立即调用future.get(),主线程会等待每个任务依次完成,而不是同时处理所有任务,因为future.get()。这导致所有任务还是在串行(一个接一个)执行,不能充分利用多线程的并行优势。因此,虽然你用completablefuture.supplyasync来启动了异步任务,但由于future.get()是阻塞的,你的代码实际上是在每个任务完成前停下来等待,这种行为消除了并行处理的优势。
解决了多线程的问题之后再考虑如何合理给List分片以缩短时间的问题,因为这个业务无非就这两个步骤,分片然后合并。
除了普通的循环遍历分割之外,主要尝试三种方法:Google 的 Guava 框架、Apache 的 commons 框架、Hutool 框架。
- Google Guava
<!-- google guava 工具类 -->
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.0.1-jre</version>
</dependency>
/**
* 多线程合并方法
* Google Guava框架分片
*/
long startTime1 = System.currentTimeMillis(); // 开始时间
List<List<Map<String, LocalDate>>> instanceListChunk1 = Lists.partition(instanceList, subListLength);
List<CompletableFuture<List<Map<String, LocalDate>>>> futures1 = new ArrayList<>();
for(List<Map<String, LocalDate>> subList : instanceListChunk1){
CompletableFuture<List<Map<String, LocalDate>>> futureChunk1 = CompletableFuture.supplyAsync(() -> mergeDays(subList));
futures1.add(futureChunk1);
}
CompletableFuture<Void> allOf1 = CompletableFuture.allOf(
futures1.toArray(new CompletableFuture[0])
);
List<Map<String, LocalDate>> mergedDateList1 = allOf1.thenApply(v ->
futures1.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.collect(Collectors.toList())
).get();
List<Map<String, LocalDate>> resultList1 = mergeDays(mergedDateList1);
long endTime1 = System.currentTimeMillis();
long duration1 = endTime1 - startTime1;
System.out.println("【Guava框架分片 + 多线程合并】:");
System.out.println("运行时间: " + duration1 + " 毫秒");
long totalDays1 = resultList1.stream()
.mapToLong(map -> map.get("endDate").toEpochDay() - map.get("startDate").toEpochDay() + 1) // +1 to include both start and end date
.sum();
System.out.println(resultList1);
System.out.println("总计天数:" + totalDays1);
- Apache commons
<!-- apache 集合工具类 -->
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.4</version>
</dependency>
/**
* 多线程合并方法
* apache commons框架分片
*/
long startTime2 = System.currentTimeMillis(); // 开始时间
List<List<Map<String, LocalDate>>> instanceListChunk2 = ListUtils.partition(instanceList, subListLength);
List<CompletableFuture<List<Map<String, LocalDate>>>> futures2 = new ArrayList<>();
for(List<Map<String, LocalDate>> subList : instanceListChunk2){
CompletableFuture<List<Map<String, LocalDate>>> futureChunk2 = CompletableFuture.supplyAsync(() -> mergeDays(subList));
futures2.add(futureChunk2);
}
CompletableFuture<Void> allOf2 = CompletableFuture.allOf(
futures2.toArray(new CompletableFuture[0])
);
List<Map<String, LocalDate>> mergedDateList2 = allOf2.thenApply(v ->
futures2.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.collect(Collectors.toList())
).get();
List<Map<String, LocalDate>> resultList2 = mergeDays(mergedDateList2);
long endTime2 = System.currentTimeMillis();
long duration2 = endTime2 - startTime2;
System.out.println("【commons框架分片 + 多线程合并】:");
System.out.println("运行时间: " + duration2 + " 毫秒");
long totalDays2 = resultList2.stream()
.mapToLong(map -> map.get("endDate").toEpochDay() - map.get("startDate").toEpochDay() + 1) // +1 to include both start and end date
.sum();
System.out.println(resultList2);
System.out.println("总计天数:"+ totalDays2);
- Hutool
<!-- 工具类 hutool -->
<!-- https://mvnrepository.com/artifact/cn.hutool/hutool-all -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.14</version>
</dependency>
/**
* 多线程合并方法
* Hutool框架分片
*/
long startTime3 = System.currentTimeMillis(); // 开始时间
List<List<Map<String, LocalDate>>> instanceListChunk3 = ListUtil.partition(instanceList, subListLength);
List<CompletableFuture<List<Map<String, LocalDate>>>> futures3 = new ArrayList<>();
for(List<Map<String, LocalDate>> subList : instanceListChunk3){
CompletableFuture<List<Map<String, LocalDate>>> futureChunk3 = CompletableFuture.supplyAsync(() -> mergeDays(subList));
futures3.add(futureChunk3);
}
CompletableFuture<Void> allOf3 = CompletableFuture.allOf(
futures3.toArray(new CompletableFuture[0])
);
List<Map<String, LocalDate>> mergedDateList3 = allOf3.thenApply(v ->
futures3.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.collect(Collectors.toList())
).get();
List<Map<String, LocalDate>> resultList3 = mergeDays(mergedDateList3);
long endTime3 = System.currentTimeMillis();
long duration3 = endTime3 - startTime3;
System.out.println("【Hutool框架分片 + 多线程合并】:");
System.out.println("运行时间: " + duration3 + " 毫秒");
long totalDays3 = resultList3.stream()
.mapToLong(map -> map.get("endDate").toEpochDay() - map.get("startDate").toEpochDay() + 1) // +1 to include both start and end date
.sum();
System.out.println(resultList3);
System.out.println("总计天数:"+ totalDays3);
另外stream的方法不推荐因为我没写出来(不是),因为写出来发现时间花销很久。
上面这三种的时间开销都还可以,60w条数据的时候能缩短一半左右。