spring Batch
用来处理大量数据操作的一个框架,
主要用来读取大量数据,然后进行一定处理后输出成指定的形式。
名称 | 用途 |
---|---|
Job Repository | 用来注册Job容器 |
Job Launcher | 用来启动Job接口 |
Job | 实际要执行的任务,包含一个或多个Step |
Step | 包含Item Reader Processor Writer |
Item Reader | 读取数据的接口 |
Item Processor | 处理数据的接口 |
Item Writer | 输出数据的接口 |
主要组成部分注册成 Spring Bean
//@Configuration
@EnableBatchProcessing
数据读取:Spring Batch 为我们提供了大量的 ItemReader的实现
数据处理和校验都是通过 ItemProcessor接口完成
实现ItemProcessor 重写 process 方法, 从 ItemReader,读取到数据,返回给 Item Writer
1. pom
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
<exclusions>
<exclusion>
<groupId>org.hsqldb</groupId>
<artifactId>hsqldb</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<artifactId>ojdbc6</artifactId>
<version>11.2.0.2.0</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
</dependency> //作为校验使用
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
2. 测试 people.csv
汪某某,11,汉族,合肥
张某某,12,汉族,上海
李某某,13,非汉族,武汉
刘某,14,非汉族,南京
欧阳某某,115,汉族,北京
3. 数据表定义
src/main/resources/schema.sql
create table PERSON
(
id NUMBER not null primary key,
name VARCHAR2(20),
age NUMBER,
nation VARCHAR2(20),
address VARCHAR2(20)
);
4. 领域模型类
public class Person {
@Size(max=4,min=2) //1
private String name;
private int age;
private String nation;
private String address;
}
5. 数据处理及校验
public class CsvItemProcessor extends ValidatingItemProcessor<Person>{
@Override
public Person process(Person item) throws ValidationException {
super.process(item); //1 才会调用自定义校验器
if(item.getNation().equals("汉族")){ //2 如果汉族就转成01
item.setNation("01");
}else{
item.setNation("02");
}
return item;
}
}
public class CsvBeanValidator<T> implements Validator<T>,InitializingBean {
private javax.validation.Validator validator;
@Override
public void afterPropertiesSet() throws Exception { //1 JSR-303 的Validator的初始化
ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory();
validator = validatorFactory.usingContext().getValidator();
}
@Override
public void validate(T value) throws ValidationException {
Set<ConstraintViolation<T>> constraintViolations = validator.validate(value); //2
if(constraintViolations.size()>0){
StringBuilder message = new StringBuilder();
for (ConstraintViolation<T> constraintViolation : constraintViolations) {
message.append(constraintViolation.getMessage() + "\n");
}
throw new ValidationException(message.toString());
}
}
}
6. job监听
public class CsvJobListener implements JobExecutionListener{ //实现这个类
long startTime;
long endTime;
@Override
public void beforeJob(JobExecution jobExecution) { //重写
startTime = System.currentTimeMillis();
System.out.println("任务处理开始");
}
@Override
public void afterJob(JobExecution jobExecution) { //重写
endTime = System.currentTimeMillis();
System.out.println("任务处理结束");
System.out.println("耗时:" + (endTime - startTime) + "ms");
}
}
7. 配置
//@Configuration
@EnableBatchProcessing //开户被批处理的支持
public class CsvBatchConfig {
@Bean
public ItemReader<Person> reader() throws Exception {
FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1这个类读取文件
reader.setResource(new ClassPathResource("people.csv")); //2 csv文件的位置
reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3 数据和领域模型映射
setLineTokenizer(new DelimitedLineTokenizer() {{
setNames(new String[] { "name","age", "nation" ,"address"});
}});
setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
setTargetType(Person.class);
}});
}});
return reader;
}
@Bean
public ItemProcessor<Person, Person> processor() {
CsvItemProcessor processor = new CsvItemProcessor(); //1 自定义的csv
processor.setValidator(csvBeanValidator()); //2 指定校验器
return processor;
}
@Bean
public ItemWriter<Person> writer(DataSource dataSource) {//1 注入boot 定义的dataSource
JdbcBatchItemWriter<Person> writer = new JdbcBatchItemWriter<Person>(); //2 jdbc的批处理 JdbcBatchItemWriter,写数据到数据库
writer.setItemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<Person>());
String sql = "insert into person " + "(id,name,age,nation,address) "
+ "values(hibernate_sequence.nextval, :name, :age, :nation,:address)";
writer.setSql(sql); //3 执行要处理的批处理 SQL语句
writer.setDataSource(dataSource);
return writer;
}
@Bean //工作仓库
public JobRepository jobRepository(DataSource dataSource, PlatformTransactionManager transactionManager)
throws Exception {
JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
jobRepositoryFactoryBean.setDataSource(dataSource);
jobRepositoryFactoryBean.setTransactionManager(transactionManager);
jobRepositoryFactoryBean.setDatabaseType("oracle");
return jobRepositoryFactoryBean.getObject();
}
@Bean //启动job的接口
public SimpleJobLauncher jobLauncher(DataSource dataSource, PlatformTransactionManager transactionManager) //需要dataSource,transactionManager
throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository(dataSource, transactionManager));
return jobLauncher;
}
@Bean //导入job
public Job importJob(JobBuilderFactory jobs, Step s1) {
return jobs.get("importJob")
.incrementer(new RunIdIncrementer())
.flow(s1) //1 为job指定 step
.end()
.listener(csvJobListener()) //2 注册并绑定 监听器到job
.build();
}
@Bean //步骤
public Step step1(StepBuilderFactory stepBuilderFactory, ItemReader<Person> reader, ItemWriter<Person> writer,
ItemProcessor<Person,Person> processor) {
return stepBuilderFactory
.get("step1")
.<Person, Person>chunk(65000) //1 批处理 每次提交 65000条数据
.reader(reader) //2 给step 绑定 reader
.processor(processor) //3 绑定 processor
.writer(writer) //4 绑定 writer
.build();
}
@Bean
public CsvJobListener csvJobListener() {
return new CsvJobListener();
}
@Bean
public Validator<Person> csvBeanValidator() {
return new CsvBeanValidator<Person>();
}
}
8.手动触发任务
//@Configuration 注释这个配置
@EnableBatchProcessing
public class CsvBatchConfig {
}
新建TriggerBatchConfig。内容与 CsvBachConfig保持一致
除了 ItemReader 这个bean
@Bean
@StepScope
public FlatFileItemReader<Person> reader(@Value("#{jobParameters['input.file.name']}") String pathToFile) throws Exception {
FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1 用 FlatFile。而不是Item Reader
reader.setResource(new ClassPathResource(pathToFile)); //2
reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3
setLineTokenizer(new DelimitedLineTokenizer() {{
setNames(new String[] { "name","age", "nation" ,"address"});
}});
setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
setTargetType(Person.class);
}});
}});
return reader;
}
9. 定义控制器
@RestController
public class DemoController {
@Autowired
JobLauncher jobLauncher;
@Autowired
Job importJob;
public JobParameters jobParameters;
@RequestMapping("/read")
public String imp(String fileName) throws Exception{
String path = fileName+".csv";
jobParameters = new JobParametersBuilder()
.addLong("time", System.currentTimeMillis())
.addString("input.file.name", path)
.toJobParameters();
jobLauncher.run(importJob,jobParameters);
return "ok";
}
}
10. 配置文件
spring.datasource.driverClassName=oracle.jdbc.OracleDriver
spring.datasource.url=jdbc\:oracle\:thin\:@192.168.31.183\:49161\:xe
spring.datasource.username=system
spring.datasource.password=oracle
spring.batch.job.enabled=false #关闭自动执行job的配置
logging.level.org.springframework.web = DEBUG
http://localhost:8080/read?fileName=people
表里就会生成处理好的,对应的数据,就像 hadoop一样。