spring-boot-batch
开发环境
开发工具:Intellij IDEA 2018.2.6
springboot: 2.0.6.RELEASE
jdk:1.8.0_192
maven: 3.6.0
spring-boot-batch
轻量级,全面的批处理框架,旨在开发对企业系统日常运营至关重要的强大批处理应用程序。Spring Batch提供了可重复使用的功能,这些功能对于处理大量记录至关重要,包括记录/跟踪,事务管理,作业处理统计,作业重启,跳过和资源管理。它还提供更高级的技术服务和功能,通过优化和分区技术实现极高容量和高性能的批处理作业。简单和复杂的大批量批处理作业可以高度可扩展的方式利用框架来处理大量信息。Spring Boot提供了一个spring-boot-starter-batch依赖项。
- pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<artifactId>spring-boot-batch</artifactId>
<groupId>com.andy</groupId>
<version>1.0.7.RELEASE</version>
<modelVersion>4.0.0</modelVersion>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>io.spring.platform</groupId>
<artifactId>platform-bom</artifactId>
<version>Cairo-SR6</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.46</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>2.0.3.RELEASE</version>
<configuration>
<!--<mainClass>${start-class}</mainClass>-->
<layout>ZIP</layout>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
- 启动类
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
/**
* <p> 批处理
*
* @author Leone
* @since 2018-10-08
**/
@SpringBootApplication
public class BatchApplication {
public static void main(String[] args) {
SpringApplication.run(BatchApplication.class, args);
}
}
- application.yml
spring:
datasource:
driver-class-name: com.mysql.jdbc.Driver
url: jdbc:mysql://localhost:3306/boot?useSSL=false
username: root
password: root
batch:
job:
enabled: true
initialize-schema: always
- person.csv
汪含,31,0,湖北 武汉,2011-12-20 15:20:28
张零永,16,1,广东 广州,2015-08-27 17:40:22
李林羽,23,1,四川 成都,2010-02-23 19:20:26
刘三风,19,1,浙江 杭州,2009-03-20 11:10:24
李妮莉,18,0,海南 三亚,2008-08-28 12:30:18
王老五,23,1,江苏 南京,2007-02-15 19:20:26
黄晓明,19,1,福州 福建,2006-01-12 11:10:24
赵航,18,0,海南 海口,2016-02-23 12:30:18
- person.sql
drop table if exists t_person;
create table if not exists t_person (
id int primary key auto_increment comment '主键',
name varchar(48) comment '姓名',
age int comment '年龄',
sex varchar(8) comment '性别',
address varchar(48) comment '地址',
birthday timestamp comment '生日'
);
- CsvBatchConfig.java
import com.andy.batch.entity.Person;
import com.andy.batch.listener.CsvJobListener;
import com.andy.batch.process.CsvBeanValidator;
import com.andy.batch.process.CsvValidatingItemProcessor;
import com.andy.batch.process.CsvLineMapper;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.core.launch.support.SimpleJobLauncher;
import org.springframework.batch.core.repository.JobRepository;
import org.springframework.batch.core.repository.support.JobRepositoryFactoryBean;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider;
import org.springframework.batch.item.database.JdbcBatchItemWriter;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.validator.Validator;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.ClassPathResource;
import org.springframework.transaction.PlatformTransactionManager;
import javax.sql.DataSource;
import java.io.IOException;
/**
* <p>Spring Batch的主要组成部分只需注册成Spring的bean即可。若想开启批处理的支持还需在配置类上使用@EnableBatchProcessing
*
* @author Leone
* @since 2018-10-08
**/
@Configuration
@EnableBatchProcessing
public class CsvBatchConfig {
/**
* JobRepository 用户注册Job的容器
*
* @param dataSource
* @param transactionManager
* @return
* @throws Exception
*/
@Bean
public JobRepository jobRepository(DataSource dataSource, PlatformTransactionManager transactionManager)
throws Exception {
// jobRepository的定义需要dataSource和transactionManager,Spring Boot已为我们自动配置了这两个类,Spring可通过方法注入已有的Bean。
JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
jobRepositoryFactoryBean.setDataSource(dataSource);
jobRepositoryFactoryBean.setTransactionManager(transactionManager);
jobRepositoryFactoryBean.setDatabaseType("mysql");
return jobRepositoryFactoryBean.getObject();
}
/**
* JobLauncher:用来启动Job的接口
*
* @param dataSource
* @param transactionManager
* @return
* @throws Exception
*/
@Bean
public SimpleJobLauncher jobLauncher(DataSource dataSource, PlatformTransactionManager transactionManager) throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository(dataSource, transactionManager));
return jobLauncher;
}
/**
* Job:我们要实际执行的任务,包含一个或多个Step
*
* @param jobBuilderFactory
* @param step
* @return
*/
@Bean
public Job importJob(JobBuilderFactory jobBuilderFactory, Step step) {
return jobBuilderFactory
.get("importJob")
.incrementer(new RunIdIncrementer())
.flow(step)
.end()
.listener(csvJobListener())
.build();
}
/**
* Step:Step-步骤包含ItemReader,ItemProcessor和ItemWriter
*
* @param stepBuilderFactory
* @param reader
* @param writer
* @param processor
* @return
*/
@Bean
public Step step1(StepBuilderFactory stepBuilderFactory,
ItemReader<Person> reader,
ItemWriter<Person> writer,
ItemProcessor<Person, Person> processor) {
// 1.批处理每次提交65000条数据。
return stepBuilderFactory.get("step1").<Person, Person>chunk(65000)
// 2.给step绑定reader
.reader(reader)
// 3.给step绑定processor
.processor(processor)
// 4.给step绑定writer
.writer(writer)
.build();
}
/**
* ItemReader:用来读取数据的接口
*
* @return
* @throws Exception
*/
@Bean
public ItemReader<Person> reader() throws IOException {
// 使用FlatFileItemReader读取文件
FlatFileItemReader<Person> reader = new FlatFileItemReader<>();
// 使用FlatFileItemReader的setResource方法设置csv文件的路径
reader.setResource(new ClassPathResource("person.csv"));
// 在此处对cvs文件的数据和领域模型类做对应映射
reader.setLineMapper(csvLineMapper());
/*reader.setLineMapper(new DefaultLineMapper<Person>() {{
setLineTokenizer(new DelimitedLineTokenizer() {{
setNames("name", "age", "sex", "address", "birthday");
}});
setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
setTargetType(Person.class);
}});
}});*/
return reader;
}
/**
* ItemProcessor:用来处理数据的接口
*
* @return
*/
@Bean
public ItemProcessor<Person, Person> processor() {
// 使用我们自己定义的ItemProcessor的实现CsvItemProcessor。
CsvValidatingItemProcessor processor = new CsvValidatingItemProcessor();
// 为processor指定校验器为CsvBeanValidator;
processor.setValidator(csvBeanValidator());
return processor;
}
/**
* ItemWriter:用来输出数据的接口
*
* @param dataSource
* @return
*/
@Bean
public ItemWriter<Person> writer(DataSource dataSource) {
// 我们使用JDBC批处理的JdbcBatchItemWriter来写数据到数据库。
JdbcBatchItemWriter<Person> writer = new JdbcBatchItemWriter<>();
writer.setItemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<>());
String sql = "insert into t_person (name,age,sex,address,birthday) values (:name, :age, :sex, :address, :birthday)";
// 在此设置要执行批处理的SQL语句。
writer.setSql(sql);
writer.setDataSource(dataSource);
return writer;
}
/**
* 自定义监听器
*
* @return
*/
@Bean
public CsvJobListener csvJobListener() {
return new CsvJobListener();
}
/**
* 自定义校验器
*
* @return
*/
@Bean
public Validator<Person> csvBeanValidator() {
return new CsvBeanValidator<>();
}
/**
* 自定义lineMapper
*
* @return
*/
@Bean
public CsvLineMapper csvLineMapper() {
return new CsvLineMapper();
}
}
- Person.java
package com.andy.batch.entity;
import javax.validation.constraints.Size;
import java.util.Date;
/**
* <p>
*
* @author Leone
* @since 2018-10-08
**/
public class Person {
@Size(max = 10, min = 2)
private String name;
private Integer age;
private String sex;
private String address;
private Date birthday;
public Person() {
}
public Person(String name, Integer age, String sex, String address, Date birthday) {
this.name = name;
this.age = age;
this.sex = sex;
this.address = address;
this.birthday = birthday;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSex() {
return sex;
}
public void setSex(String sex) {
this.sex = sex;
}
public Date getBirthday() {
return birthday;
}
public void setBirthday(Date birthday) {
this.birthday = birthday;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
}
- CsvJobListener.java
import lombok.extern.slf4j.Slf4j;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobExecutionListener;
/**
* <p>监听器实现JobExecutionListener接口,并重写其beforeJob、afterJob方法即可
*
* @author Leone
* @since 2018-10-08
**/
@Slf4j
public class CsvJobListener implements JobExecutionListener {
private long startTime;
@Override
public void beforeJob(JobExecution jobExecution) {
startTime = System.currentTimeMillis();
log.info("任务处理开始...");
}
@Override
public void afterJob(JobExecution jobExecution) {
long endTime = System.currentTimeMillis();
log.info("任务处理结束,耗时:{} ms", (endTime - startTime));
}
}
- CsvBeanValidator.java
import org.springframework.batch.item.validator.Validator;
import org.springframework.beans.factory.InitializingBean;
import javax.validation.ConstraintViolation;
import javax.validation.Validation;
import javax.validation.ValidationException;
import javax.validation.ValidatorFactory;
import java.util.Set;
/**
* <p> 我们可以JSR-303(主要实现由hibernate-validator)的注解,来校验ItemReader读取到的数据是否满足要求。我们可以让我们的ItemProcessor实现ValidatingItemProcessor接口
*
* @author Leone
* @since 2018-10-09
**/
public class CsvBeanValidator<T> implements Validator<T>, InitializingBean {
private javax.validation.Validator validator;
// 使用JSR-303的Validator来校验我们的数据,在此处进行JSR-303的Validator的初始化。
@Override
public void afterPropertiesSet() {
ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory();
validator = validatorFactory.usingContext().getValidator();
}
// 使用Validator的validate方法校验数据。
@Override
public void validate(T value) throws ValidationException {
Set<ConstraintViolation<T>> constraintViolations = validator.validate(value);
if (constraintViolations.size() > 0) {
StringBuilder message = new StringBuilder();
for (ConstraintViolation<T> constraintViolation : constraintViolations) {
message.append(constraintViolation.getMessage()).append("\n");
}
throw new ValidationException(message.toString());
}
}
}
- CsvLineMapper.java
import com.andy.batch.entity.Person;
import org.springframework.batch.item.file.LineMapper;
import org.springframework.batch.item.file.mapping.JsonLineMapper;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* <p>
*
* @author leone
* @since 2019-01-21
**/
public class CsvLineMapper implements LineMapper<Person> {
private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
@Override
public Person mapLine(String line, int lineNumber) throws Exception {
String[] lines = line.split(",");
Date date = sdf.parse(lines[4]);
return new Person(lines[0], Integer.valueOf(lines[1]), lines[2], lines[3], date);
}
}
- CsvValidatingItemProcessor.java
package com.andy.batch.process;
import com.andy.batch.entity.Person;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.validator.ValidatingItemProcessor;
import javax.validation.ValidationException;
import java.text.SimpleDateFormat;
/**
* <p>数据处理只需实现ItemProcessor接口,重写其process方法。方法输入的参数是从ItemReader读取到的数据,返回的数据给ItemWriter
*
* @author Leone
* @since 2018-10-08
**/
public class CsvValidatingItemProcessor extends ValidatingItemProcessor<Person> {
@Override
public Person process(Person item) throws ValidationException {
// 需执行super.process (item) 才会调用自定义校验器
super.process(item);
// 对数据做简单的处理 将性别装换为中文
if (item.getSex().equals("1")) {
item.setSex("男");
} else {
item.setSex("女");
}
return item;
}
}
- ScheduledTaskService.java
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
/**
* <p> 定时任务
*
* @author Leone
* @since 2018-10-09
**/
@Service
public class ScheduledTaskService {
@Resource
private JobLauncher jobLauncher;
@Resource
private Job importJob;
private JobParameters jobParameters;
@Scheduled(fixedRate = 5000)
public void execute() throws Exception {
jobParameters = new JobParametersBuilder().addLong("time", System.currentTimeMillis()).toJobParameters();
jobLauncher.run(importJob, jobParameters);
}
}