关于批量数据的导入修改

雪上一枝蒿

已于 2023-11-08 21:14:01 修改

阅读量45

点赞数

文章标签： windows

于 2023-11-08 21:10:08 首次发布

本文链接：https://blog.csdn.net/zx121221856/article/details/134298163

版权

@Test
    public void importTxt() {
        List<BizShop> bizShopList = new ArrayList<>();
        try {
            FileInputStream fis = new FileInputStream("C:\\Users\\logging\\Desktop\\20231106.txt");
            InputStreamReader isr = new InputStreamReader(fis, "GBK"); // 给fis文件字节输入流读出来的字节,指定了GBK的字符集 做解码
            BufferedReader br = new BufferedReader(isr);
            String line;
            while ((line = br.readLine()) != null) {
                BizShop bizShop = new BizShop();
                String[] split = line.split(",");
                bizShop.setShopId(split[0]);
                bizShop.setAdminPhone(split[1].substring(1));
                bizShop.setAdminName(split[2]);
                bizShopList.add(bizShop);
            }
            isr.close();
        } catch (Exception e) {
            log.info(e.getMessage());
        }
        List<String> shopIds = bizShopList.stream().map(bizShop -> bizShop.getShopId()).collect(Collectors.toList());
        List<String> nullIds = bizShopMapper.selectNull(shopIds);
        //需要执行的集合
        List<BizShop> bizShops = bizShopList.stream()
                .filter(bizShop -> nullIds.contains(bizShop.getShopId()))
                .collect(Collectors.toList());
        log.info("文本读出来的空id数量---------" + shopIds.size() + "数据库里符合条件的空Ids---------" + nullIds.size() + "需要执行的空Ids---------" + bizShops.size());
        executeSql(bizShops);
        List<String> exportIds = bizShopMapper.exportNull();
//        Set<String> set = new HashSet<>();
//        set.addAll(exportIds);
//        set.addAll(shopIds);
//        log.info(exportIds.size()+"+"+shopIds.size()+"------------------------"+set.size());

        // 找出两个Stream的重复元素
//        List<String> intersection = new ArrayList<>(exportIds);
//        intersection.retainAll(shopIds);

        // 打印出重复元素
//        log.info("-----------重复的Id："+intersection+intersection.size());

        //找到为空的id并导出
        File outFile = new File("C:\\Users\\logging\\Desktop\\20231108.txt");
        Writer out;
        try {
            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFile, true), "utf-8"), 10240);
            for (int i = 0; i < exportIds.size(); i++) {
                out.write(exportIds.get(i) + "\r\n");
            }
            out.flush();
            out.close();
        } catch (Exception e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }


    /**这里并没有使用线程池，仅仅提供方法
        // 每个线程执行的数据量
        int batchSize = 4000;
        // 计算需要分多少个集合
        int batch = bizShops.size() / batchSize;
        // 计算最后一个集合的大小
        int lastSize = bizShops.size() % batchSize;
        List<List<BizShop>> splitList = new ArrayList<>()
        for (int i = batchSize; i <= batch * batchSize; i = i + batchSize) {
            // 截取本次要添加的数据
            List<BizShop> threatList = bizShops.subList(i - batchSize, i);
            // 添加本批次数据到集合中
            splitList.add(threatList);
            log.info("{}", threatList.size());
        }
        if (lastSize != 0) {
            // 如果元素有剩余则将所有元素作为一个子列表一次性插入
            List<BizShop> lastList = bizShops.subList(batchSize * batch, bizShops.size());
            // 添加集合到数据库中
            log.info("最后一次数据大小{}", +lastList.size());
            splitList.add(lastList);
        }
        log.info("数据集合大小{}", +splitList.size());
        ExecutorService executorService = Executors.newFixedThreadPool(3);

        for (int i = 0; i < 3; i++) {
            executorService.execute(new Runnable() {
                @Override
                public void run() {
                    if (bizShops.size() > 0) {
                        List<BizShop> shops = splitList.remove(splitList.size() - 1);
                        executeSql(shops);
                    }
                }
            });
        }
        executorService.shutdown();
    **/
    }

    //执行sql
    private void executeSql(List<BizShop> bizShops) {
        // 每次插入的数量
        int batchSize = 300;
        // 计算需要分多少批插入数据库
        int batch = bizShops.size() / batchSize;
        // 计算最后一批的大小
        int lastSize = bizShops.size() % batchSize;

        log.info("插入数量{},分多少批{},最后一批的大小{}", batchSize, batch, lastSize);
        // 将筛选出的结果分批次添加到表中
        int num = 1;
        for (int i = batchSize; i <= batch * batchSize; i = i + batchSize) {
            log.info("开始");
            // 截取本次要添加的数据
            List<BizShop> insertList = bizShops.subList(i - batchSize, i);
            // 添加本批次数据到数据库中
            log.info("批量导入数据第{}次", num);
            num++;
            bizShopMapper.updatePhone(insertList);
        }
        // 最后一批元素的大小是否为0
        if (lastSize != 0) {
            // 如果元素有剩余则将所有元素作为一个子列表一次性插入
            List<BizShop> lastList = bizShops.subList(batchSize * batch, bizShops.size());
            // 添加集合到数据库中
            log.info("最后一次导入数据");
            bizShopMapper.updatePhone(lastList);
        }
    }
}

<!-- 查找手机或者名字为空的商店id -->
<select id="selectNull" resultType="java.lang.String">
    select shop_id from biz_shop where shop_id in
    <foreach collection="list" open="(" close=")" item="id" separator=",">#{id}</foreach>
    and (admin_name = '' or admin_phone = '')
</select>
<select id="exportNull" resultType="java.lang.String">
    select shop_id from biz_shop where admin_name = '' or admin_phone = ''
    order by id
</select>
<!-- 批量更新通过接收传进来的参数list进行循环着组装sql -->
<update id="updatePhone">
    <!-- 接收list参数，循环着组装sql语句，注意for循环的写法
         separator=";" 代表着每次循环完，在sql后面放一个分号
         item="shop" 循环List的每条的结果集
         collection="list" list 即为 map传过来的参数key -->
    <foreach collection="list" separator=";" item="shop">
        update biz_shop set
        admin_name = #{shop.adminName},
        admin_phone = #{shop.adminPhone}
        where shop_id = #{shop.shopId}
    </foreach>
</update>

注意：unit单元测试无法多线程可以写一个controller然后用post发http请求完成

然后数据库连接处需要配置

&rewriteBatchedStatements=true允许批量插入update也行，提高速率
&allowMultiQueries=true允许多sql

雪上一枝蒿

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
关于批量数据的导入修改

log.info("文本读出来的空id数量---------" + shopIds.size() + "数据库里符合条件的空Ids---------" + nullIds.size() + "需要执行的空Ids---------" + bizShops.size());// log.info("-----------重复的Id："+intersection+intersection.size());log.info("数据集合大小{}", +splitList.size());
复制链接

扫一扫