方法一: CompletableFuture使用
异步多线程处理任务
//线程池初始化
int threadNum = 5;
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(threadNum);
executor.setMaxPoolSize(threadNum);
executor.setThreadNamePrefix("archive-data-");
executor.setWaitForTasksToCompleteOnShutdown(true);
executor.initialize();
//计算页码
Long total = 40000000L;
int batchSize = 1000;
long totalPage = total % batchSize == 0 ? total / batchSize : total / batchSize + 1;
List<CompletableFuture<List<Archive>>> tasks = new ArrayList<>(((int) totalPage));
Archive lastArchive=null;//业务处理需要
//翻页异步处理任务
LongStream.rangeClosed(1, totalPage).forEach(currentPage -> tasks.add(
CompletableFuture
.supplyAsync(() -> {
synchronized (lastArchive) {//控制并发
List<Archive> archives = getData();//获取业务数据
//需要设置lastArchive,翻页获取数据,select
return archives;
}
}, executor).whenComplete((archives, throwable) -> {
//业务处理,insert,delete
archives.clear();//清理处理列表,避免OOM
}).exceptionally(throwable -> {
//异常处理,如:日志输出
return null;
})
)
);
//任务完成后处理,如:业务处理记录
CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).whenComplete((aVoid, throwable) -> {
System.out.println("==========================all over");
}).join();
executor.destroy();//释放线程池线程
方法二: ForkJoinTask使用
采用递归和 fork join算法
- 任务类
//任务处理类,递归处理,即需要确定最小处理单元
public class NoticeArchiveTask extends RecursiveTask<Integer> {
private static final Logger logger = LoggerFactory.getLogger(NoticeArchiveTask.class);
private final List<Archive> datas;
private final int batchSize = 5;
private final int threadNum = 3;
private ArchiveDao archiveDao;
public NoticeArchiveTask(List<Archive> datas,ArchiveDao archiveDao) {
this.datas = datas;
this.archiveDao=archiveDao;
}
@Override
protected Integer compute() {
int dataSize = datas.size();
if (dataSize <= batchSize) {
String archiveCollName = "notice-3011";
if (!CollectionUtils.isEmpty(datas)) {
archiveDao.insertMany(datas, archiveCollName);
}
return dataSize;
} else {
int taskCount = dataSize % batchSize == 0 ? dataSize / batchSize : dataSize / batchSize + 1;
int lastIndex = 0;
if (threadNum < taskCount) {
taskCount = threadNum;
lastIndex = threadNum * batchSize;
}
List<NoticeArchiveTask> tasks = new ArrayList<>();
for (int taskIndex = 0; taskIndex < taskCount; taskIndex++) {
int fromIndex = taskIndex * batchSize;
int toIndex = fromIndex + batchSize;
if (toIndex > dataSize) {
toIndex = dataSize;
}
List<Archive> subList = datas.subList(fromIndex, toIndex);
logger.info("taskIndex:{},subList.size:{}", taskIndex, subList.size());
NoticeArchiveTask noticeArchiveTask = new NoticeArchiveTask(subList,archiveDao);
tasks.add(noticeArchiveTask);
}
if (lastIndex > 0) {
List<Archive> lastSubList = datas.subList(lastIndex, dataSize);
NoticeArchiveTask noticeArchiveTask = new NoticeArchiveTask(lastSubList,archiveDao);
tasks.add(noticeArchiveTask);
logger.info("lastSubList.size:{}", lastSubList.size());
}
invokeAll(tasks);
int dataSum = tasks.stream().map(task -> task.join()).mapToInt(Integer::intValue).sum();
return dataSum;
}
}
}
- 调用类
public void archiveNoticeData() {
Date dateTime = DateUtils.parseDate("2020-03-06");
// 一次分页查询数据量
// 先查询第一页数据
Page<Archive> page = new Page<>();
page.setPageSize(pageSize);
List<Archive> archivePage = archiveDao.getArchivePage(dateTime, page);
int totalPage = page.getTotalPage();
int totalSize = page.getTotalSize();
LOGGER.info("============archive==================pageSize:{},totalPage:{},totalSize:{}",pageSize,totalPage,totalSize);
//没有数据
if (totalSize == 0) {
LOGGER.info("============archive==================notice表记录数为0,无须归档");
// todo 保存归档日志
return;
}
//处理第一批数据 todo
LOGGER.info("============archive==================第1批数据开始执行");
ForkJoinPool forkJoinPool=new ForkJoinPool();
doForkJoinTask(archivePage,forkJoinPool);
if(totalPage >1){
for (int pageIndex = 1; pageIndex < totalPage; pageIndex++) {
String archiveFlag= CacheUtils.get("notice:archive:flag");
while ("wait".equals(archiveFlag)){//判断第一批任务未完成,则等待
try {
Thread.sleep(500);
archiveFlag= CacheUtils.get("notice:archive:flag");
LOGGER.error("============archive==================等待中哦");
} catch (InterruptedException e) {
LOGGER.error(e.getMessage(), e);
}
}
//数据处理 todo
int batchIndex=pageIndex+1;
LOGGER.info("============archive==================第{}批数据开始执行",batchIndex);
page.setCurPage(batchIndex);
archivePage = archiveDao.getArchivePage(dateTime, page);
doForkJoinTask(archivePage,forkJoinPool);
}
}
//可以用来判断任务是否已结束
int activeThreadCount = forkJoinPool.getActiveThreadCount();
//完成任务后业务处理
}
private void doForkJoinTask(List<Archive> archivePage,ForkJoinPool forkJoinPool) {
try {
NoticeArchiveTask noticeArchiveTask=new NoticeArchiveTask(archivePage,archiveDao);
ForkJoinTask<Integer> taskSubmitResult = forkJoinPool.submit(noticeArchiveTask);
Integer dataSum = taskSubmitResult.get();
while (Objects.isNull(dataSum)){//没有结果,则任务没结束
try {
LOGGER.info("============archive==================等待中哦");
Thread.sleep(500);
} catch (InterruptedException e) {
LOGGER.error(e.getMessage(), e);
}
CacheUtils.set("notice:archive:flag",1800000,"wait");
}
CacheUtils.set("notice:archive:flag",1800000,"goon");
LOGGER.info("============archive==================该批处理数据总数:{}",dataSum);
} catch (Exception e) {
LOGGER.error("============archive==================,该批处理数据异常,错误信息:",e);
}
}