springbatch批处理,不要理解为hadoop那样的【分布式计算】框架,只是个简单的批处理,适合批量处理日志导入数据库,hdfs,hbase等等,匹配修改每一行的大写等等之类,不能很好的做分布式wordcount
【基本理解】
需要配置一个job,一个job可以分为几个step,一个step有read,process,write过程的顺序,process可以没有,write必须,write可以是文件的FlatFileItemWriter,也可以是数据库,jms等等
job里的step流程示例
可以用.start(step1()).on("FAILED").end()
之类的ExitStatus
动态修改step流程(就是在stepBuilderFactory的read等等后面加listener,里面返回ExitStatus),都是比较简单的流程
batchStatusVsExitStatus
下面是一个简单的例子,把txt文本转为一行单个token的txt文件,知道spring batch的基本使用,方法
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.9.RELEASE</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jms</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>org.hsqldb</groupId>
<artifactId>hsqldb</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- test -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.batch</groupId>
<artifactId>spring-batch-test</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class BatchApp {
public static void main(String[] args) {
SpringApplication.run(BatchApp.class,args);
}
}
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.item.file.FlatFileItemReader;
import org.springframework.batch.item.file.FlatFileItemWriter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.io.FileSystemResource;
import org.springframework.util.StringUtils;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.StringTokenizer;
@Configuration
@EnableBatchProcessing
public class BatchConfiguration {
@Autowired
public JobBuilderFactory jobBuilderFactory;
@Autowired
public StepBuilderFactory stepBuilderFactory;
private static final int CHUNK_SIZE =5;
private static final String MESSAGE_FILE = "test.txt";
private static final String OUTPUT_FILE = "output.txt";
// job-->step-->read,[process],write
@Bean
public Job messageMigrationJob(Step myStep) {
return jobBuilderFactory.get("myJob")
.start(myStep)
.build();
}
@Bean
public Step myStep(FlatFileItemReader<List<String>> dataReader,
FlatFileItemWriter<List<String>> dataWriter) {
return stepBuilderFactory.get("messageMigrationStep")
.<List<String>, List<String>>chunk(CHUNK_SIZE)
.reader(dataReader)
.writer(dataWriter)
.build();
}
@Bean
public FlatFileItemReader<List<String>> dataReader() {
FlatFileItemReader<List<String>> reader = new FlatFileItemReader<>();
reader.setResource(new FileSystemResource(new File(MESSAGE_FILE)));
reader.setLineMapper((line, lineNumber) -> {
if (StringUtils.isEmpty(line)) return Collections.emptyList();
StringTokenizer itr = new StringTokenizer(line);
List<String> stringList = new ArrayList<>();
while (itr.hasMoreTokens()) {
String one = itr.nextToken();
if (StringUtils.isEmpty(one)) continue;
stringList.add(one.replace("\"","")
.replace(",","")
.replace(".","")
.replace("'s",""));
}
return stringList;
});
return reader;
}
@Bean
public FlatFileItemWriter<List<String>> dataWriter() {
FlatFileItemWriter<List<String>> writer = new FlatFileItemWriter<>();
writer.setResource(new FileSystemResource(new File(OUTPUT_FILE)));
writer.setLineAggregator(stringList -> {
StringBuilder sb = new StringBuilder();
for(String one:stringList){
if (StringUtils.isEmpty(one)) continue;
sb.append(one).append("\n");
}
//默认返回的字符写入一行,batch会自动加换行符
if (sb.length()>1) sb.deleteCharAt(sb.length()-1);
return sb.toString();
});
return writer;
}
}