Springboot+SpringBatch处理2000W数据

最新推荐文章于 2024-05-02 22:24:32 发布

热诚丶

最新推荐文章于 2024-05-02 22:24:32 发布

阅读量139

点赞数

文章标签： spring boot 后端 java

本文链接：https://blog.csdn.net/qq_39859936/article/details/134514017

版权

业务需求是将一张2000多万的数据表进行分表插入。

引入依赖

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-batch</artifactId>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.connector.java.version}</version>
        </dependency>
        
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>${druid.version}</version>
        </dependency>

添加批处理配置

#项目启动会执行springbatch自带的sql脚本
spring.sql.init.schema-locations=classpath:org/springframework/batch/core/schema-mysql.sql
#项目启动后是否执行sql脚本 always 和 never，第一次配置always 之后换成never
spring.sql.init.mode=never
#项目启动时执行job
spring.batch.job.enabled=false

添加启动类

//开启批处理
@EnableBatchProcessing
@SpringBootApplication
public class GatherPlusApplication {

    public static void main(String[] args) {
        SpringApplication.run(GatherPlusApplication.class, args);
    }
}

添加job配置

@Configuration
public class MonitorGatherHistoryJobConfig {

    @Autowired
    private JobBuilderFactory jobBuilderFactory;

    @Autowired
    private StepBuilderFactory stepBuilderFactory;

    @Autowired
    private SqlSessionFactory sqlSessionFactory;

    @Bean
    @StepScope  //这个要获取分区器的参数，必须加上这个注解
    public MyBatisPagingItemReader<BitMonitorGather1History> itemReader( @Value("#{stepExecutionContext[startIndex]}")Long startIndex,
                                                                         @Value("#{stepExecutionContext[endIndex]}")Long endIndex) {

        System.out.println("----------MyBatisPagingItemReader开始-----startIndex: " + startIndex + "  -----endIndex:" + endIndex);

        MyBatisPagingItemReader<BitMonitorGather1History> reader = new MyBatisPagingItemReader<>();
        reader.setSqlSessionFactory(sqlSessionFactory);
        //1个线程每次读取1000条数据
        reader.setPageSize(1000);
      reader.setQueryId("water.gather.plus.gather.mapper.BitMonitorGather1HistoryMapper.selectTempForList");
        Map<String, Object> map = new HashMap<>(2);
        map.put("startIndex",startIndex);
        map.put("endIndex",endIndex);
        reader.setParameterValues(map);
        return reader;
    }


    @Bean
    public ItemProcessor<BitMonitorGather1History, BitMonitorGatherHistory> itemProcessor() {
        return new ItemProcessor<BitMonitorGather1History, BitMonitorGatherHistory>() {

			//我这里只是进行了日期的格式化
            @Override
            public BitMonitorGatherHistory process(BitMonitorGather1History bitMonitorGather1History) throws Exception {
                BitMonitorGatherHistory bitMonitorGatherHistory = new BitMonitorGatherHistory();
                BeanUtil.copyProperties(bitMonitorGather1History, bitMonitorGatherHistory, "insertTime");
                Long min = bitMonitorGatherHistory.getMin();
                DateTime dateTime = DateUtil.parse(min + "00");
                bitMonitorGatherHistory.setInsertTime(dateTime);
                return bitMonitorGatherHistory;
            }
        };
    }

	//执行插入操作 这里遇到一个坑，开始使用的是mybatis的insert方法插入，执行一半就报错了，跟进源码发现有时候执行的sql语句有差别，原因是，我读出来的数据有几个字段有时候有值有时候没值，插入的sql语句发现字段的值为空就会去掉这个字段，所以一批任务会有不同的sql插入语句，springbatch一批处理的语句必须相同。因此我手写了插入语句。
    @Bean
    public MyBatisBatchItemWriter<BitMonitorGatherHistory> itemWriter() {
        MyBatisBatchItemWriter<BitMonitorGatherHistory> itemWriter = new MyBatisBatchItemWriter<>();
        itemWriter.setSqlSessionFactory(sqlSessionFactory);
        itemWriter.setStatementId("water.gather.plus.gather.mapper.BitMonitorGatherHistoryMapper.save");  //操作sql
        return itemWriter;
    }

    @Bean
    public Step workStep() {
        return stepBuilderFactory.get("workStep")
                .<BitMonitorGather1History, BitMonitorGatherHistory>chunk(1000) // 设置批量处理的大小 每次处理1000条数据
                .reader(itemReader(null,null))
                .processor(itemProcessor())
                .writer(itemWriter())
                .build();
    }

    @Bean
    public Step masterStep() {
        return stepBuilderFactory.get("masterStep")
                .partitioner(workStep().getName(), partitioner())
                .partitionHandler(partitionHandler())
                .build();
    }

    @Bean
    public MonitorGatherJobListener monitorGatherJobListener() {
        return new MonitorGatherJobListener();
    }

    @Bean
    public PartitionHandler partitionHandler() {
        TaskExecutorPartitionHandler handler = new TaskExecutorPartitionHandler();
        handler.setGridSize(50);  //这里我开启50个线程
        handler.setTaskExecutor(new SimpleAsyncTaskExecutor());
        handler.setStep(workStep());
        try {
            handler.afterPropertiesSet();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return handler;
    }

    @Bean
    public MonitorGatherTimePartitioner partitioner() {
        return new MonitorGatherTimePartitioner();
    }

    @Bean
    public Job myJob() {
        return jobBuilderFactory.get("myJob")
                .start(masterStep())
                .incrementer(myParamIncrementer()) //保证可以多次执行
                .listener(monitorGatherJobListener())
                .build();
    }

    @Bean
    public MyParamIncrementer myParamIncrementer(){
        return new MyParamIncrementer();
    }

//itemReader执行的查询语句
public interface BitMonitorGather1HistoryMapper extends BaseMapper<BitMonitorGather1History> {

    @Select("select * from bit_monitor_gather_1_history where and MIN >= #{startIndex} and MIN < #{endIndex} limit #{_pagesize} OFFSET #{_skiprows}")
    List<BitMonitorGather1History> selectTempForList();
}
    
 //基于上面插入问题重写了方法，保证每次执行sql都是一样的。
public interface BitMonitorGatherHistoryMapper extends BaseMapper<BitMonitorGatherHistory> {

    @Insert("INSERT INTO bit_monitor_gather_history (\n" +
            "\tID,\n" +
            "\tMEASURE_ID,\n" +
            "\tFACTORY_SN,\n" +
            "\tMONITOR_CHILD_ID,\n" +
            "\tPLC_NAME,\n" +
            "\tFACTORY_NAME,\n" +
            "\tDEVICE_STATUS,\n" +
            "\t`MIN`,\n" +
            "\tMEASURE_POINT_NAME,\n" +
            "\tMEASURE_POINT_NAME_SHOW,\n" +
            "\tMEASURE_POINT_RAW_DATA,\n" +
            "\tMEASURE_POINT_UNIT,\n" +
            "\tMEASURE_POINT_DATA_TYPE,\n" +
            "\tMEASURE_POINT_TIME_STAMP,\n" +
            "\tINSERT_TIME,\n" +
            "\tPARENT_SYSTEM_NAME\n" +
            ")\n" +
            "VALUES\n" +
            "\t(\n" +
            "\t\t#{id}, #{measureId}, #{factorySn}, #{monitorChildId}, #{plcName}, #{factoryName}, #{deviceStatus}, #{min}, #{measurePointName},#{measurePointNameShow}, #{measurePointRawData}, #{measurePointUnit}, #{measurePointDataType}, #{measurePointTimeStamp}, #{insertTime}, #{parentSystemName}\n" +
            "\t);")
    int save(BitMonitorGatherHistory bitMonitorGatherHistory);

//添加分区器  表中1小时大概4W条数据，因此我以1小时为单位去处理
public class MonitorGatherTimePartitioner implements Partitioner {
    @Override
    public Map<String, ExecutionContext> partition(int gridSize) {
        DateTime startTime = DateUtil.parse("20231024000000");
        DateTime lastTime = DateUtil.parse("20231116000000");
        Map<String, ExecutionContext> partitions = new HashMap<>();
        long between = DateUtil.between(startTime, lastTime,DateUnit.HOUR)+1;
        for (int i = 0; i < between; i++) {
            if (startTime.compareTo(lastTime) >=0){
                break;
            }
            String start = startTime.toString("yyyyMMddHHmm");
            String end = DateUtil.offsetHour(startTime,1).toString("yyyyMMddHHmm");
            // 创建分区的参数集合
            Map<String, Object> partitionParams = new HashMap<>();
            partitionParams.put("startIndex", Long.valueOf(start));
            partitionParams.put("endIndex", Long.valueOf(end));

            // 创建 ExecutionContext 并添加到分区集合中
            ExecutionContext context = new ExecutionContext(partitionParams);
            partitions.put("partition" + i, context);
            startTime = DateUtil.offsetHour(startTime,1);
        }
        return partitions;
    }
}

//添加job监听器
public class MonitorGatherJobListener implements JobExecutionListener {
    @Override
    public void beforeJob(JobExecution jobExecution) {
        long begin = System.currentTimeMillis();
        jobExecution.getExecutionContext().putLong("begin", begin);
        System.err.println("-------------------------【开始时间：】---->"+begin+"<-----------------------------");
    }

    @Override
    public void afterJob(JobExecution jobExecution) {
        long begin = jobExecution.getExecutionContext().getLong("begin");
        long end = System.currentTimeMillis();
        System.err.println("-------------------------【结束时间：】---->"+end+"<-----------------------------");
        System.err.println("-------------------------【总耗时：】---->"+(end - begin)+"<-----------------------------");
    }


//添加自定义自增器,这里说一下，因为我这个任务因为上面插入的问题导致多次失败，因此我本次任务并没有执行完成，所以我赋值了我本次的任务id，再次启动job就会从上次失败的地方继续执行，不会从头开始执行。

//如果你的任务每次都要重新执行，请使用springbatch自带的自增器
public class MyParamIncrementer implements JobParametersIncrementer {
    @Override
    public JobParameters getNext(JobParameters jobParameters) {
        return new JobParametersBuilder(jobParameters)
                .addLong("run.id", 44L)
                .toJobParameters();
    }
}