JDK:1.8+
在处理一个需求时,需要从另一个库迁移数据至我们的库,大概有400多W的数据,所以就写了一个多线程的导入,将查询出的多条数据进行拆分,然后插入至数据库。
具体代码一,此处使用的是
ExecutorService,Semaphore,countDownLatch
private void multithreadedListProcess(List list) {
// 开始时间
long start = System.currentTimeMillis();
if (CollectionUtils.isEmpty(list)) {
return;
}
int listSize = list.size();
//跑批分页大小
int limitNum = 500;
//线程数
int threadNum = listSize % limitNum == 0 ? listSize / limitNum : listSize / limitNum + 1;
//最大线程数控制
int maxthreadNum = 5;
// ThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(threadNum);
ExecutorService executor = new ThreadPoolExecutor(maxthreadNum, maxthreadNum,
1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(threadNum), Executors.defaultThreadFactory(),
new ThreadPoolExecutor.CallerRunsPolicy());
CountDownLatch countDownLatch = new CountDownLatch(threadNum);
//最大并发线程数控制
final Semaphore semaphore = new Semaphore(maxthreadNum);
List handleList = null;
for (int i = 0; i < threadNum; i++) {
if ((i + 1) == threadNum) {
int startIndex = i * limitNum;
int endIndex = list.size();
handleList = list.subList(startIndex, endIndex);
} else {
int startIndex = i * limitNum;
int endIndex = (i + 1) * limitNum;
handleList = list.subList(startIndex, endIndex);
}
MultiTask task = new MultiTask(handleList, countDownLatch, semaphore);
executor.execute(task);
}
try {
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
executor.shutdown();
System.err.println(Thread.currentThread().getName()+" 执行任务消耗了 :" + (System.currentTimeMillis() - start) + "毫秒");
}
}
public class MultiTask implements Runnable {
private List list;//需要导入的集合
private CountDownLatch countDownLatch;
//使用信号量来管理释放
private Semaphore semaphore;
public MultiTask(List list, CountDownLatch countDownLatch, Semaphore semaphore) {
this.list = list;
this.countDownLatch = countDownLatch;
this.semaphore = semaphore;
}
@Override
public void run() {
try {
if (!CollectionUtils.isEmpty(list)) {
semaphore.acquire();
//具体的业务-------------------------------------- start
//具体的业务-------------------------------------- end
}
} catch (Exception e) {
e.printStackTrace();
} finally {
semaphore.release();
//线程任务完成
countDownLatch.countDown();
}
}
}
该种方式,已经可以满足我的导入,由于我的业务操作比较繁琐,期间需要查库,所以每一万数据大概需要30秒左右的时间。所以再实践中决定使用forkJoinPool来看看是否效率能否提升。
具体代码二,此处使用的是
ForkJoinPool
private void forkJoinProcess(List list) {
logger.info("fork/join导入开始:");
if (CollectionUtils.isEmpty(list)) {
return;
}
ForkJoinPool pool = null;
long startTime=System.currentTimeMillis();
try {
pool = new ForkJoinPool(10);
ForkJoinTask task = new ForkJoinTask(list);
ForkJoinTask t = (ForkJoinTask) pool.submit(task);
t.get();//这样能监听到结束 否则主线程直接结束了
} catch (Exception e) {
e.printStackTrace();
} finally {
pool.shutdown();
}
logger.info(" forkJoinProcess 执行任务消耗了 :" + (System.currentTimeMillis() - startTime) + "毫秒");
}
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.RecursiveAction;
/**
* 关于继承
* RecursiveAction 是不带返回参数的
* RecursiveTask 是可以指定参数的
*/
public class ForkJoinTask extends RecursiveAction {
// 临界值 mybatis批量插入有1000的限制,我们保持500条一批插入一次数据库
private static final int THRESHOLD = 500;
private List list;
public ForkJoinTask(List list){
this.list = list;
}
@Override
protected void compute() {
// 开始时间
long start = System.currentTimeMillis();
boolean compute = list.size() <= THRESHOLD;
if (compute) {
executeTask(list);
} else {
System.err.println("===导入数量过多:"+list.size()+"进行分组拆分:");
List<List<Object>> lists = ForkJoinTask.averageAssign(list, 2);
// 递归
ForkJoinTask task1 = new ForkJoinTask(lists.get(0));
//System.err.print("===此次分组1数量:"+lists.get(0).size());
ForkJoinTask task2 = new ForkJoinTask(lists.get(1));
//System.err.print("===此次分组2数量:"+lists.get(1).size());
// 拆分任务,把任务压入线程队列
invokeAll(task1, task2);
}
//System.err.println(" compute 执行任务消耗了 :" + (System.currentTimeMillis() - start) + "毫秒");
}
/**
* 具体的导入业务
* @param list
*/
private void executeTask(List list){
try {
if (!CollectionUtils.isEmpty(list)) {
//具体的业务-------------------------------------- start
//具体的业务-------------------------------------- end
}
}
} catch (Exception e) {
e.printStackTrace();
}finally {
//System.err.println("executeTask 线程:("+Thread.currentThread().getName()+")执行完成!");
}
}
/**
* 将一组数据平均分成n组
*
* @param source 要分组的数据源
* @param n 平均分成n组
* @param <T>
* @return
*/
public static <T> List<List<T>> averageAssign(List<T> source, int n) {
List<List<T>> result = new ArrayList<>();
int remainder = source.size() % n; //(先计算出余数)
int number = source.size() / n; //然后是商
int offset = 0;//偏移量
for (int i = 0; i < n; i++) {
List<T> value;
if (remainder > 0) {
value = source.subList(i * number + offset, (i + 1) * number + offset + 1);
remainder--;
offset++;
} else {
value = source.subList(i * number + offset, (i + 1) * number + offset);
}
result.add(value);
}
return result;
}
}
这里的效率大概提升了7秒左右,每一万数据。