Java优雅地进行批量数据插入或添加处理

ABin-阿斌

已于 2024-05-10 23:07:37 修改

阅读量1.1k

点赞数

分类专栏：各大实战问题分析文章标签： java

于 2022-10-19 15:30:08 首次发布

ABin-阿斌

本文链接：https://blog.csdn.net/Mango_Bin/article/details/125155179

版权

各大实战问题分析专栏收录该内容

64 篇文章 8 订阅

订阅专栏

本文探讨了在处理大数据查询时如何进行分页，以减轻数据库压力。介绍了在业务层面用Java代码实现分页、使用工具类ListUtil进行切分、封装分页工具类（针对MybatisPlus）以及不同场景下的分页解决方案，如Mybatis的SQL写法、手动计算分页、SpringDataJPA的分批处理和多线程异步处理等方法。

摘要由CSDN通过智能技术生成

在这里插入图片描述

一、问题点

有时候在查询数据时可能需要根据【A表】的某个 ID 去获取【B表】的具体信息，但是又因为数据量的庞大，我们需要对【A表】查出来的数据结果进行分页处理之后再循环调用【B表】

二、解决方案

1、在业务层面用Java代码进行分页处理

比如：通过策略ID去获取当前策略下的订单信息，那么为了防止订单信息过多我们可以这么做

在这里插入图片描述

首先将查询获取订单信息在Service层写一个分页接口，然后进行调用
具体的分页参数信息可以根据业务来定，这里进行分批处理，一页：100条
然后进行循环翻页处理

在这里插入图片描述

2、使用工具类 List 总数切分

注意： ListUtil.partition()，这个方法从某种意义上说并没有达到真正分页的效果，因为当我们使用这个方法时，List结果早已执行出来了；我们要做的就是在结果没有出来之前就减少 DB 的压力。
当然，具体业务具体分析，少量的数据可以使用这个方法
直接使用 Hutool 工具类中的 ListUtil.partition(list，100)方法，参数一： 具体 list、参数二： 具体分多少条

3、自己封装一个分页工具类

下面这个工具类适用于 MyBatisPlus

工具类代码

public class MybatisParameterUtils {


    public static <T, F> void cutInParameter(LambdaQueryWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.in(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().in(column, objects);
            }
        });
    }

    public static <T, F> void cutNotInParameter(LambdaQueryWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.notIn(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().notIn(column, objects);
            }
        });
    }


    public static <T, F> void cutInParameter(LambdaQueryChainWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.in(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().in(column, objects);
            }
        });
    }

    public static <T, F> void cutNotInParameter(LambdaQueryChainWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.notIn(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().notIn(column, objects);
            }
        });
    }

    public static <T, F> void cutInParameter(LambdaUpdateWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.in(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().in(column, objects);
            }
        });
    }

    public static <T, F> void cutNotInParameter(LambdaUpdateWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.notIn(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().notIn(column, objects);
            }
        });
    }


    public static <T, F> void cutInParameter(LambdaUpdateChainWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.in(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().in(column, objects);
            }
        });
    }

    public static <T, F> void cutNotInParameter(LambdaUpdateChainWrapper<T> wrapper, SFunction<T, ?> column, List<F> coll) throws Exception {
        List<List<F>> newList = splitList(coll, 900);
        if (ObjectUtils.isEmpty(newList)) {
            throw new Exception("参数错误");
        } else if (newList.size() == 1) {
            wrapper.notIn(column, newList.get(0));
            return;
        }

        wrapper.and(i -> {
            i.in(column, newList.get(0));
            newList.remove(0);
            for (List<F> objects : newList) {
                i.or().notIn(column, objects);
            }
        });
    }


    public static <F> List<List<F>> splitList(List<F> list, int groupSize) {
        int length = list.size();
        // 计算可以分成多少组
        int num = (length + groupSize - 1) / groupSize;
        List<List<F>> newList = new ArrayList<>(num);
        for (int i = 0; i < num; i++) {
            // 开始位置
            int fromIndex = i * groupSize;
            // 结束位置
            int toIndex = Math.min((i + 1) * groupSize, length);
            newList.add(list.subList(fromIndex, toIndex));
        }
        return newList;
    }
}

案例展示

思路就是把条件拆成小于 1000 的组合条件写 xml 同理

  //                                    这是一个条件wrapper    get方法的方法引用    一个参数list   
    MybatisParameterUtils.cutInParameter(deleteInfoWrapper,   Vo::getId,          list);

要带进去 In 的参数，这个参数有数量可能会很大
参数一：wrapper；参数二：要 in 那个的参数；参数三：具体带进去 in 的参数(list)

4、适用于Mybatis SQL写法

<if test="dto.idList != null and dto.idList.size() > 0">
                and (t.id IN
                <foreach collection="dto.idList" index="index" open="(" close=")" item="item" >
                    <if test="index !=0">
                        <choose>
                            <when test="index % 1000 == 999">) OR t.id IN (</when>
                            <otherwise>,</otherwise>
                        </choose>
                    </if>
                    #{item}
                </foreach>
                )
</if>

5、手动计算分页处理方式举例

import java.util.List;

public class MassiveDataProcessor {

    private DataService dataService; // 假设有一个用于操作数据的Service

    // 分页处理海量数据
    public void processMassiveData() {
        int pageSize = 1000; // 每页数据量
        long totalCount = dataService.getTotalCount(); // 获取总数据量
        long totalPages = (totalCount + pageSize - 1) / pageSize; // 计算总页数

        for (int currentPage = 1; currentPage <= totalPages; currentPage++) {
            List<Data> dataList = dataService.getDataByPage(currentPage, pageSize); // 获取当前页的数据
            batchUpdateData(dataList); // 批量更新当前页的数据
        }
    }

    // 批量更新数据
    private void batchUpdateData(List<Data> dataList) {
        int batchSize = 100; // 每批次处理的数据量

        for (int i = 0; i < dataList.size(); i += batchSize) {
            List<Data> batchList = dataList.subList(i, Math.min(i + batchSize, dataList.size()));
            dataService.batchUpdate(batchList); // 调用Service进行批量更新操作
        }
    }
}

6、使用Spring Data JPA分批插入和修改数据（举例）

import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
 
public interface YourEntityRepository extends JpaRepository<YourEntity, Long> {
    Page<YourEntity> findAll(Pageable pageable);
}

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Service;
 
import java.util.List;
 
@Service
public class YourService {
 
    @Autowired
    private YourEntityRepository yourEntityRepository;
 
    public void batchUpdate(List<YourEntity> entities) {
        int pageSize = 1000; // 每批次处理的数量
        int currentPage = 0;
        PageRequest pageRequest = PageRequest.of(currentPage, pageSize);
 
        do {
            Page<YourEntity> page = yourEntityRepository.findAll(pageRequest);
            List<YourEntity> pageEntities = page.getContent();
 
            // 对当前批次的数据进行处理，例如修改或插入
            for (YourEntity entity : pageEntities) {
                // 修改逻辑
                // entity.setSomeField(newValue);
                // yourEntityRepository.save(entity);
 
                // 或者插入逻辑
                // yourEntityRepository.save(newEntity);
            }
 
            // 准备下一个批次
            currentPage++;
            pageRequest = PageRequest.of(currentPage, pageSize);
        } while (page.hasNext()); // 如果还有更多页面，继续循环
    }
}