前言
工作中有时候会遇到集合中的数据量太大,需要分批次处理的情况,比如名单下发,mybatis批量插入或删除(mybatis批量插入最大支持1000)
1.List集合分批单线程处理
mybatis批量插入或删除(mybatis in最大支持1000),分批处理,适用于单线程情况,因为ArrayList是非线程安全的容器
1.1单线程分批方式一
//单线程分批处理list数据
private void SingleThreadedListHandler1 (List closeList){
if (CollectionUtils.isEmpty(list)) {
return ;
}
//执行数量
int limitNum=1000;
int size = closeList.size();
//批次数
int batchNum= size / limitNum;
for (int i = 0; i < batchNum; i++) {
List batchList=closeList.subList(0,limitNum);
//执行逻辑
batchCloseOld(batchList);
//清除批次执行list的数量
closeList.subList(0,limitNum).clear();
}
//执行最后的批次
batchCloseOld(closeList);
}
//优化后
private void SingleThreadBatchHandlerList2(List list) {
if (CollectionUtils.isEmpty(list)) {
return;
}
int size = list.size();
//执行数量
int limitNum = 3;
//批次数
int batchNum = size % limitNum == 0 ? size / limitNum : size / limitNum + 1;
boolean falg = size % limitNum != 0;
for (int i = 0; i < batchNum; i++) {
if (falg && i == batchNum - 1) {
//size/limitNum取余数(除不尽),计算end的索引值
limitNum = size % limitNum;
}
List batchList = list.subList(0, limitNum);
System.out.println("第" + i + batchList);
list.subList(0, limitNum).clear();
}
}
1.2单线程分批方式二
//单线程分批处理list数据
public void SingleThreadedListHandler1(final List userInfoList) {
int size = userInfoList.size();
int limitNum = 10;
//批次数
int batchNum = size % limitNum == 0 ? size / limitNum : size / limitNum + 1;
boolean falg = size % limitNum != 0;
for (int i = 0; i < batchNum; i++) {
int starNum = i * limitNum;
int endNum = (i + 1) * limitNum;
//size/limitNum取余数(除不尽),计算end的索引值
if (falg && i == batchNum - 1) {
endNum = size;
}
List batchList = userInfoList.subList(starNum, endNum);
dbDao.batchExecutor(NameSpaceEnum.USER_MAPPER, "batchInsert", batchList);
}
}
2.多线线程处理List数据
2.1多线程顺序遍历含List集合
实例1:如何让n个线程顺序遍历含有n个元素的List集合
/**
* <p>
* 实例1:如何让n个线程顺序遍历含有n个元素的List集合
* </P>
*/
@Test
public void multiThreadedSequentialTraversalList() {
multithreadedTest test = new multithreadedTest();
// 准备数据
List<String> data = new ArrayList<String>();
for (int i = 1; i < 15; i++) {
data.add("item" + i);
}
test.handleList(data, 4);
}
public synchronized void handleList(List<String> data, int threadNum) {
int size = data.size();
int limitNum = size % threadNum == 0 ? size / threadNum : (size / threadNum + 1);
boolean falg = size % threadNum != 0;
for (int i = 0; i < threadNum; i++) {
int startIndex = i * limitNum;
int endIndex = (i + 1) * limitNum;
if (falg && i == threadNum - 1) {
endIndex = size;
}
HandleThread thread = new HandleThread("线程[" + (i + 1) + "] ", data, startIndex, endIndex);
thread.start();
}
}
class HandleThread extends Thread {
private String threadName;
private List<String> data;
private int start;
private int end;
public HandleThread(String threadName, List<String> data, int start, int end) {
this.threadName = threadName;
this.data = data;
this.start = start;
this.end = end;
}
public void run() {
List<String> subList = data.subList(start, end)/*.add("^&*")*/;
System.out.println(threadName + "处理了" + subList.size() + "条!" + subList.toString());
}
}
2.2多线程并发读取读取list对象
List多线程并发读取读取现有的list对象
/**
* <p>
* 实例2:List多线程并发读取读取现有的list对象
* </P>
*/
@Test
void multiThreadedReadListObject() {
List<String> list = new ArrayList<String>(10);
Map<Long, Integer> map = new HashMap<>(16);
for (int i = 0; i < 100; i++) {
list.add("" + i);
}
int pcount = Runtime.getRuntime().availableProcessors();
long start = System.currentTimeMillis();
for (int i = 0; i < pcount; i++) {
Thread t = new MyThread1(list, map);
map.put(t.getId(), Integer.valueOf(i));
t.start();
try {
t.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println(list.get(i));
}
System.out.println("----" + (System.currentTimeMillis() - start));
}
}
class MyThread1 extends Thread {
private List<String> list;
private Map<Long,Integer> map;
public MyThread1(List<String> list,Map<Long,Integer> map){
this.list = list;
this.map = map;
}
@Override
public void run() {
int pcount = Runtime.getRuntime().availableProcessors();
int i = map.get(Thread.currentThread().getId());
for(;i<list.size();i+=pcount){
System.out.println(list.get(i));
}
}
}
2.3多线程分段处理List集合
2.31实例一
场景:大数据List集合,新增,更新,删除数据等
解决方案:
1.List集合分段,
2.动态创建线程池newFixedThreadPool
3.操作在多线程中实现
注意事项:使用多线程时候根据业务场景来合理设置线程池中的各个参数
核心线程数,最大线程数,队列数量,线程池的拒绝策略
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;
@SpringBootTest
class ExcelApplicationTests {
/**
* <p>
* 实例3:多线程分段处理List集合
* 场景:大数据List集合,需要对List集合中的数据同标准库中数据进行对比,生成新增,更新,取消数据
* <p>
* 解决方案:
* 1.List集合分段,
* 2.动态创建线程池newFixedThreadPool
* 3.将对比操作在多线程中实现
*
* </P>
*/
/**
* <p>
* 实例1:多线程分段处理List集合
* 场景:大数据List集合,需要对List集合中的数据
* <p>
* 解决方案:
* 1.List集合分段,
* 2.动态创建线程池newFixedThreadPool
* 3.将对比操作在多线程中实现
* </P>
*/
@Test
public void multithreadedListHandler1() {
// 开始时间
long start = System.currentTimeMillis();
List<String> list = new ArrayList<>(10);
for (long i = 1; i <= 4; i++) {
list.add(i + "");
}
int pcount = Runtime.getRuntime().availableProcessors();
int listSize = list.size();
//每次执行数据
int limitNum = 2;
// 线程数(批次)
int threadNum = listSize % limitNum == 0 ? listSize / limitNum : listSize / limitNum + 1;
// 定义标记,过滤threadNum为整数
boolean special = listSize % limitNum == 0;
// 创建一个线程池
ExecutorService executor = new ThreadPoolExecutor(threadNum, threadNum, 1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(threadNum), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy());
// 定义一个任务集合
List<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
Callable<Integer> task = null;
List<String> cutList = null;
// 确定每条线程的数据
for (int i = 0; i < threadNum; i++) {
if ((i + 1) == threadNum) {
cutList = list.subList(i * limitNum, listSize);
} else {
cutList = list.subList(i * limitNum, (i + 1) * limitNum);
}
System.out.println("第" + (i + 1) + "组:" + cutList.toString());
final List<String> handleList = cutList;
task = new Callable<Integer>() {
@Override
public Integer call() throws Exception {
System.out.println(Thread.currentThread().getName() + "线程:" + handleList);
return 1;
}
};
// 这里提交的任务容器列表和返回的Future列表存在顺序对应的关系
tasks.add(task);
}
try {
List<Future<Integer>> results = executor.invokeAll(tasks);
for (Future<Integer> future : results) {
System.out.println(future.get());
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
} finally {
// 关闭线程池
executor.shutdown();
System.out.println("线程任务执行结束");
System.err.println("执行任务消耗了 :" + (System.currentTimeMillis() - start) + "毫秒");
}
}
}
2.32实例二
/**
* <p>
* 实例2:多线程分段处理List集合
* </P>
*/
@Test
public void multithreadedListHandler2() {
List<Object> list = new ArrayList<Object>(10);
for (int i = 1; i <= 4; i++) {
list.add(i + "");
}
int limitNum = 2;
// 总数据条数
int listSize = list.size();
// 线程数
int threadNum = listSize % limitNum == 0 ? listSize / limitNum : listSize / limitNum + 1;
// 创建一个线程池
ExecutorService executor = new ThreadPoolExecutor(threadNum, threadNum, 1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(threadNum), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy());
List<Future<List<Object>>> futures = new ArrayList<>(threadNum);
//分配
for (int i = 0; i < threadNum; i++) {
if ((i + 1) == threadNum) {
futures.add(executor.submit(new Task(list, i * limitNum, listSize)));
} else {
futures.add(executor.submit(new Task(list, i * limitNum, (i + 1) * limitNum)));
}
}
try {
//处理
List<Object> result = new ArrayList<>();
for (Future<List<Object>> future : futures) {
//如果任务没有完成则忙等待
while (!future.isDone()) ;
// System.out.println(future.get());
//合并操作
result.addAll(future.get());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
//关闭线程池,不再接收新的任务
executor.shutdown();
}
}
class Task implements Callable<List<Object>> {
private final List<Object> list;
private int start;
private int end;
public Task(List<Object> list, int start, int end) {
this.list = list;
this.start = start;
this.end = end;
}
@Override
public List<Object> call() throws Exception {
Object obj = null;
List<Object> retList = new ArrayList<Object>();
for (int i = start; i < end; i++) {
obj = list.get(i);
//处理逻辑
retList.add(obj);
}
System.out.println(Thread.currentThread().getName() + "线程:" + retList);
//返回处理结果
return retList;
}
}
2.33 List集合分批多线程处理,同时控制最大并发
实际应用中,分批后线程数量过大,会导致线程阻塞,线程切换上下文,效率不高,本次是根据数据量动态设置线程数,同时控制最大并发数量(业务中有IO操作,避免过大并发导致堵塞),实现效率提高
思路:
1根据数据量动态设置线程数
2.使用Semaphore 控制允许并发访问线程的个数
3.CountDownLatch计数器闭锁
如何控制某个方法允许并发访问线程的个数?
Semaphore类有两个重要方法
1、semaphore.acquire();
请求一个信号量,这时候信号量个数-1,当减少到0的时候,下一次acquire不会再执行,只有当执行一个release()的时候,信号量不为0的时候才可以继续执行acquire
2、semaphore.release();
释放一个信号量,这时候信号量个数+1,
也就是说在线程里执行某个方法的时候,在方法里用该类对象进行控制,就能保证所有的线程中最多只有指定信号量个数个该方法在执行。
举例:我开启了100个线程,执行一个()方法,但是我只想要所有线程中,最多有五个线程在执行该方法,其他的线程就必须排队等待。
则可以使用Semaphore对象进行控制,该对象new初始化的时候有个int参数,即指定最多信号量个数。
@Test
void multithreadedListSegmentation3() {
// 开始时间
long start = System.currentTimeMillis();
List<String> list = new ArrayList<>(20000);
for (long i = 1; i <= 10000000; i++) {
list.add(i + "");
}
int listSize = list.size();
//跑批分页大小
int limitNum = 1000;
//线程数
int threadNum = listSize % limitNum == 0 ? listSize / limitNum : listSize / limitNum + 1;
//最大线程数控制
int maxthreadNum = 5;
// ThreadPoolExecutor executor = new ScheduledThreadPoolExecutor(threadNum);
ExecutorService executor = new ThreadPoolExecutor(maxthreadNum, maxthreadNum, 1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(threadNum), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy());
CountDownLatch countDownLatch = new CountDownLatch(threadNum);
//最大并发线程数控制
final Semaphore semaphore = new Semaphore(maxthreadNum);
List handleList = null;
for (int i = 0; i < threadNum; i++) {
if ((i + 1) == threadNum) {
int startIndex = i * limitNum;
int endIndex = list.size();
handleList = list.subList(startIndex, endIndex);
} else {
int startIndex = i * limitNum;
int endIndex = (i + 1) * limitNum;
handleList = list.subList(startIndex, endIndex);
}
SyncTask task = new SyncTask(handleList, countDownLatch, semaphore);
executor.execute(task);
}
try {
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
executor.shutdown();
System.out.println("线程任务执行结束");
System.err.println("执行任务消耗了 :" + (System.currentTimeMillis() - start) + "毫秒");
}
}
}
@Slf4j
class SyncTask implements Runnable {
private List<String> list;
private CountDownLatch countDownLatch;
private Semaphore semaphore;
public SyncTask(List<String> list, CountDownLatch countDownLatch, Semaphore semaphore) {
this.list = list;
this.countDownLatch = countDownLatch;
this.semaphore = semaphore;
}
@Override
public void run() {
if (!CollectionUtils.isEmpty(list)) {
try {
semaphore.acquire();
list.stream().forEach(t -> {
//业务处理
});
System.out.println(Thread.currentThread().getName() + "线程:" + list);
// log.debug(String.format("%s", Thread.currentThread().getName() + "线程:" + list));
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
semaphore.release();
}
}
//线程任务完成
countDownLatch.countDown();
}
}
效率测试
执行任务消耗了 :14217毫秒
执行任务消耗了 :202588毫秒
由此可见有信号量控制并发访问线程数和没有控制线程并发访问的效率要高出不少
2.34 将集合按指定数量分组,名单下发
将集合按指定数量分组,list中的元素被平均分配到n个集合中(名单下发)
/**
* 将集合按指定数量分组,list中的元素被平均分配到n个集合中(平均分配给坐席)
*
* @param list 数据集合
* @param currentSeatsList 客服名单集合
* @return 分组结果
*/
@Test
public void collectionElementsAreGroupedByAverage() {
// 开始时间
long start = System.currentTimeMillis();
//数据
List<String> list = new ArrayList<>(10);
for (long i = 1; i <= 123; i++) {
list.add(i + "");
}
//客服名单
List<String> currentSeatsList = new ArrayList<>(10);
for (long i = 1; i <= 12; i++) {
currentSeatsList.add("customer" + i);
}
int listSize = list.size();
//根据坐席名单计算分批数量,分批数量=listSize/坐席数量
int limitNum = listSize % currentSeatsList.size() == 0 ? listSize / currentSeatsList.size() : listSize / currentSeatsList.size() + 1;
//执行次数
int batchNum = listSize % limitNum == 0 ? listSize / limitNum : listSize / limitNum + 1;
//cpu
int pcount = Runtime.getRuntime().availableProcessors();
//最大线程数控制
int maxthreadNum = 5;
ExecutorService executor = new ThreadPoolExecutor(pcount, maxthreadNum, 1, TimeUnit.MINUTES,
new ArrayBlockingQueue<>(batchNum), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy());
//CountDownLatch计数器闭锁
CountDownLatch countDownLatch = new CountDownLatch(batchNum);
//控制最大并发线程数量
final Semaphore semaphore = new Semaphore(maxthreadNum);
List handleList = null;
for (int i = 0; i < batchNum; i++) {
if ((i + 1) == batchNum) {
int startIndex = i * limitNum;
int endIndex = list.size();
handleList = list.subList(startIndex, endIndex);
} else {
int startIndex = i * limitNum;
int endIndex = (i + 1) * limitNum;
handleList = list.subList(startIndex, endIndex);
}
Task1 task = new Task1(handleList, countDownLatch, semaphore, currentSeatsList.get(i));
executor.execute(task);
}
try {
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
executor.shutdown();
System.out.println("线程任务执行结束");
System.err.println("执行任务消耗了 :" + (System.currentTimeMillis() - start) + "毫秒");
}
}
@Slf4j
class Task1 implements Runnable {
private List<String> list;
private CountDownLatch countDownLatch;
private Semaphore semaphore;
private String currentSeats;
public Task1(List<String> list, CountDownLatch countDownLatch, Semaphore semaphore, String currentSeats) {
this.list = list;
this.countDownLatch = countDownLatch;
this.semaphore = semaphore;
this.currentSeats = currentSeats;
}
@Override
public void run() {
if (!CollectionUtils.isEmpty(list)) {
try {
semaphore.acquire();
list.stream().forEach(t -> {
//业务处理
});
String strs = String.format("客服:%s 线程:%s", currentSeats, Thread.currentThread().getName() + list);
System.out.println(strs);
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
semaphore.release();
}
}
//线程任务完成
countDownLatch.countDown();
}
}
3.单线程处理map数据
3.1单线程实例
public static void batchProcess(Map<String, Object> mapTen, Integer limitNum) {
//limitNum 限制条数,最大不能超过1000,mybatis批量最大只支持1000
int mapSize = mapTen.size();
//批次数
int batchNum = (mapSize % limitNum == 0 ? mapSize / limitNum : mapSize / limitNum + 1);
Iterator<Map.Entry<String, Object>> iterator = mapTen.entrySet().iterator();
Map<String, Object> map = null;
for (int i = 0; i < batchNum; i++) {
map = new HashMap<String, Object>(16);
int j = 1;
while (iterator.hasNext()) {
if (j > limitNum) {
break;
}
Map.Entry<String, Object> entry = iterator.next();
map.put(entry.getKey(), entry.getValue());
j++;
}
//分批 操作
System.out.println(map);
}
}