Spring Boot & Spring Batch 实现批处理
需求领域
- 自动,复杂地处理大量信息,无需用户交互即可最有效地进行处理。这些操作通常包括基于时间的事件(例如月末计算,通知或通信)。
- 定期应用非常大的数据集(例如,保险利益确定或费率调整)重复处理复杂的业务规则。
- 从内部和外部系统接收的信息的集成,通常需要格式化,验证和以事务方式进行的处理到记录系统中。批处理每天用于为企业处理数十亿笔事务
Spring Batch介绍
核心流程图如下:
Spring Batch 是一个轻量级的,全面的批处理框架,旨在支持开发对企业系统的日常运行至关重要的强大的批处理应用程序,提供了可重用的功能,这些功能对于处理大量记录至关重要,可用于简单的用例(例如,将文件读入数据库或运行存储过程),也可以用于复杂的大量用例(例如,在数据库之间移动大量数据,对其进行转换等)。上)。大量批处理作业可以以高度可扩展的方式利用框架来处理大量信息
总结起来就是:量大,复杂,流程化数据处理利器!
实现需求
1、通过远程接口访问得到告警信息的原始数据
2、对数据进行清洗,聚合进行本地化操作,如:发送MQ,保存本地库等
核心依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.5.1</version>
<relativePath />
</parent>
<groupId>***</groupId>
<artifactId>***</artifactId>
<version>0.0.1-SNAPSHOT</version>
<description>***</description>
<properties>
<java.version>17</java.version>
<spring-cloud.version>2020.0.3</spring-cloud.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-dependencies</artifactId>
<version>${spring-cloud.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-config</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-bus-amqp</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jdbc</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<!--阿里云OSS依赖-->
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.10.2</version>
<exclusions>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.4.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>
</dependencies>
</project>
Spring 配置
spring:
application:
name: ***
batch:
job:
#默认自动执行定义的Job(true),改为false,需要jobLaucher.run执行
enabled: false
jdbc:
#spring batch在数据库里面创建默认的数据表,如果不是always则会提示相关表不存在
initialize-schema: always
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: ***
username: ***
password: ***
hikari:
connection-test-query: SELECT 1
rabbitmq:
listener:
simple:
missing-queues-fatal: false
addresses: ***
port: ***
username: ***
password: ***
virtual-host: ***
#--------------------mybatis配置----------------------------------
mybatis-plus:
mapper-locations:
- classpath*:mapper/*.xml
# 以下配置均有默认值,可以不设置
global-config:
db-config:
#主键类型 auto:"数据库ID自增" 1:"用户输入ID",2:"全局唯一ID (数字类型唯一ID)", 3:"全局唯一ID UUID";
id-type: auto
#字段策略 IGNORED:"忽略判断" NOT_NULL:"非 NULL 判断") NOT_EMPTY:"非空判断"
# field-strategy: NOT_EMPTY
#数据库类型
# db-type: MYSQL
configuration:
# 是否开启自动驼峰命名规则映射:从数据库列名到Java属性驼峰命名的类似映射
map-underscore-to-camel-case: true
# 如果查询结果中包含空值的列,则 MyBatis 在映射的时候,不会映射这个字段
call-setters-on-nulls: true
# 这个配置会将执行的sql打印出来,在开发或测试的时候可以用
# log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
Bean
/**
* 运行超时告警原始数据VO
* @author huxiang
* @date 2022-01-05
*/
@Data
public class RunningOverVO implements Serializable {
/** 版本号 */
private static final long serialVersionUID = 1L;
private Annotations annotations;
private Date endsAt;
private String fingerprint;
private List<RunningOverVO.Receivers> receivers;
private Date startsAt;
private Status status;
private Date updatedAt;
private String generatorURL;
private Labels labels;
@Data
public class Labels {
private String acpus;
private String alertname;
private String comment;
private String dc;
private String to;
private String instance;
private String job;
private String jobid;
private String jobname;
private String lc;
private String lnodes_s;
private String nnodes;
private String priority;
private String queue;
private String service;
private String severity;
private String ss;
private Date start;
private String state;
private Date submit;
private String user;
}
@Data
static class Receivers {
private String name;
}
@Data
class Annotations {
private String description;
private String summary;
}
@Data
class Status {
private List<String> inhibitedBy;
private List<String> silencedBy;
private String state;
}
}
/**
* @description 告警信息中间表
* @author huxiang
* @date 2022-01-05
*/
@Data
public class WarningInfoItem implements Serializable {
private static final long serialVersionUID = 1L;
private Integer id;
/**
* 报警类型,1:存储报警 2:作业长时间运行报警 3:余额不足报警 4:可用资源不足报警 5:大额消费报警
*/
private Integer typeId;
/**
* 超算中心编号
*/
private String clusterId;
/**
* 超算账号
*/
private String clusterUser;
/**
* 作业id
*/
private String jobId;
/**
* 作业开始时间
*/
private Date startTime;
/**
* 作业属性,json字符串
*/
private String job;
/**
* 创建时间
*/
private Date createAt;
}
/**
* 报警信息实体类
* @author huxiang
*/
@Data
public class WarningInfo implements java.io.Serializable {
/** 版本号 */
private static final long serialVersionUID = 1L;
@TableId(type = IdType.AUTO)
private BigInteger id;
/** 指定用户ID */
private String userId;
/** 报警类型
* 1:存储报警
* 2:作业长时间运行报警
* 3:余额不足报警
* 4:资源不足报警
* 5:大额消费报警
* 6:异常结束作业报警
* 7:作业节点数超过用户配额报警
* 8:登录节点cpu使用率高过进程挂起报警
* 9:作业性能低于预期报警
*/
private Integer typeId;
/** 内容 */
private String msg;
/** 报警时间 */
private Date datetime;
/** 创建时间 */
private Date createAt;
/** 报警确认时间 */
private Date ackAt;
/**
* 0:jobs小于5条作业信息时,jobs字段保存作业列表
* 1:jobs超过5条作业信息时,jobs字段保存阿里云Oss上的作业列表
*/
private String isOss;
/**
0:jobs字段保存作业列表;
1:jobs字段保存作业列表在阿里云Oss路径
(非所有类型报警通用)
*/
private String jobs;
/** 报警详情 */
private String remark;
/** 报警备注 */
private String desc = "详细内容进入系统查看";
/**
* 是否展示详情
*/
private Boolean isDetailInfo;
/**
* 重写toString()方法,组装JSON格式,勿动!!!!
* @return
*/
@Override
public String toString() {
return "{" +
"\"id\":" + id +
", \"userId\":\"" + userId + '\"' +
", \"typeId\":" + typeId +
", \"msg\":\"" + msg + '\"' +
", \"datetime\":\"" + datetime + '\"' +
", \"createAt\":\"" + createAt + '\"' +
", \"ackAt\":\"" + ackAt + '\"' +
", \"jobs\":\"" + jobs + '\"' +
'}';
}
}
Batch配置
@Configuration
@EnableBatchProcessing
public class BatchConfig {
/**
* JobRepository定义:Job的注册容器以及和数据库事务管理等
* @param dataSource
* @param transactionManager
* @return
* @throws Exception
*/
@Bean
public JobRepository batchJobRepository(DataSource dataSource, PlatformTransactionManager transactionManager) throws Exception{
JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
jobRepositoryFactoryBean.setDatabaseType("mysql");
jobRepositoryFactoryBean.setTransactionManager(transactionManager);
jobRepositoryFactoryBean.setDataSource(dataSource);
return jobRepositoryFactoryBean.getObject();
}
/**
* jobLauncher定义: job的启动器,绑定相关的jobRepository
* @param dataSource
* @param transactionManager
* @return
* @throws Exception
*/
@Bean
public SimpleJobLauncher batchJobLauncher(DataSource dataSource, PlatformTransactionManager transactionManager) throws Exception{
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
// 设置jobRepository
jobLauncher.setJobRepository(batchJobRepository(dataSource, transactionManager));
return jobLauncher;
}
}
Step定义
Step1
Reader
读取远程接口,将原始数据转换为原始告警Bean,JsonItemReader为ItemReader接口的实现类,泛型为原始数据转换后的实体bean类型
/**
* 获取原始报警数据
* @return
*/
@Bean("runningOverFirstReader")
@StepScope
public JsonItemReader<RunningOverVO> runningOverReaderFirst(){
CloseableHttpClient httpclient = HttpClients.createDefault();
ResponseHandler<String> responseHandler = new ResponseHandler<String>() {
@Override
public String handleResponse(final HttpResponse response)
throws IOException {//
int status = response.getStatusLine().getStatusCode();
if (status >= 200 && status < 300) {
HttpEntity entity = response.getEntity();
return entity != null ? EntityUtils.toString(entity) : null;
} else {
throw new ClientProtocolException(
"Unexpected response status: " + status);
}
}
};
HttpGet httpGet = new HttpGet(warningRunningOverUrl);
httpGet.addHeader("Accept", "application/json;charset=UTF-8");
String response = null;
try {
response = httpclient.execute(httpGet,responseHandler);
} catch (IOException e) {
e.printStackTrace();
return null;
}
if(!StringUtils.hasText(response)){
return null;
}
ByteArrayResource byteArrayResource = new ByteArrayResource(response.getBytes());
JacksonJsonObjectReader jsonObjectReader = new JacksonJsonObjectReader(RunningOverVO.class);
jsonObjectReader.setMapper(new JsonMapper());
JsonItemReader<RunningOverVO> jsonItemReader = new JsonItemReader<>(byteArrayResource,jsonObjectReader);
jsonItemReader.setName("runningOverReaderFirst");
jsonItemReader.setResource(byteArrayResource);
return jsonItemReader;
}
Process
对原始数据进行清洗,得到告警数据临时表对象,注意**ItemProcessor<RunningOverVO, WarningInfoItem>**中RunningOverVO为清洗前类型,WarningInfoItem为清洗后类型
/**
* 数据清洗,将原始数据转换为中间表数据
* @return
*/
@Bean("runningOverFirstProcess")
public ItemProcessor<RunningOverVO, WarningInfoItem> runningOverProcessFirst() {
ItemProcessor<RunningOverVO, WarningInfoItem> itemProcessor = ehrOrg -> {
RunningOverVO.Labels labels = ehrOrg.getLabels();
WarningInfoItem warningInfoItem = new WarningInfoItem();
warningInfoItem.setClusterId(labels.getDc());
warningInfoItem.setClusterUser(labels.getUser());
warningInfoItem.setJobId(labels.getJobid());
warningInfoItem.setTypeId(2);
warningInfoItem.setStartTime(labels.getStart());
warningInfoItem.setCreateAt(Calendar.getInstance().getTime());
//拼装job json字符串
StringBuffer sb = new StringBuffer();
sb.append("{");
sb.append("\"job_id\":\"" + labels.getJobid() + "\",");
sb.append("\"job_name\":\"" + labels.getJobname()+ "\",");
sb.append("\"partition\":\"" + labels.getQueue()+ "\",");
sb.append("\"node_num\":\"" + labels.getNnodes()+ "\",");
sb.append("\"cores_num\":\"" + labels.getAcpus() + "\",");
sb.append("\"run_time\":\"" + getRunningDay(labels.getStart()) + "\"");
sb.append("}");
warningInfoItem.setJob(sb.toString());
return warningInfoItem;
};
return itemProcessor;
}
/**
* 计算运行天数:当前时间-start
* @param sTime
* @throws ParseException
*/
private static long getRunningDay(Date sTime) throws ParseException {
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date nowTime=sdf.parse(sdf.format(Calendar.getInstance().getTime()));
Date startTime =sdf.parse(sdf.format(sTime));
Calendar cal = Calendar.getInstance();
cal.setTime(nowTime);
long time1 = cal.getTimeInMillis();
cal.setTime(startTime);
long time2 = cal.getTimeInMillis();
long between_days=(time1-time2)/(1000*3600*24);
return between_days;
}
Writer
自定义的writer,继承ItemWriter接口,完成临时表数据入库批量操作,实现方法public void write(List list) throws Exception中,List即批量处理的数据集合,在后面Step对象创建中配置;也可以使用ItemWriter很多现成的实现类,基本上你能想到的都已经提供:
/**
* 输出器
*/
@Component
public class RunningOverFirstWriter implements ItemWriter {
@Autowired
private JdbcTemplate jdbcTemplate;
public RunningOverFirstWriter(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}
@Override
public void write(List list) throws Exception {
String batchSQL = "insert IGNORE into warning_info_item(type_id,cluster_id,cluster_user,job_id,start_time,job,create_at) values (?,?,?,?,?,?,?)";
List<Object[]> batchArgs=new ArrayList<Object[]>();
for (Object o : list) {
WarningInfoItem item = (WarningInfoItem) o;
Object[] objs = {item.getTypeId(),item.getClusterId(),item.getClusterUser(),item.getJobId(),item.getStartTime(),item.getJob(),item.getCreateAt()};
batchArgs.add(objs);
}
jdbcTemplate.batchUpdate(batchSQL,batchArgs);
}
}
Step2
Reader
将Step1落库的临时表数据根据业务进行聚合,这一步根据自己的需要来,没必要看这么长的SQL
/**
* 获取报警消息,需要发送给报警超算中心+超算账号对应的用户以及用户所在组织的owner
* @return
*/
@Bean("runningOverSecondReader")
@StepScope
public ListItemReader<Map<String, Object>> runningOverReaderSecond(){
//临时修改配置group_concat的合并长度,只会在当前session会话生效
jdbcTemplate.execute("SET SESSION group_concat_max_len=1024000");
//通过cluster_user left join warning_info_item 得到付费者id所属超算中心&超算账号对应的所有作业告警信息
List<Map<String, Object>> lstWarning = jdbcTemplate.queryForList("SELECT \n" +
"e.user_id,\n" +
"e.type_id,\n" +
"e.clusterId,\n" +
"e.clusterUser,\n" +
"CONCAT('[',GROUP_CONCAT(e.jobs),']') jobs,\n" +
"e.date_time,\n" +
"e.createAt \n" +
"FROM (SELECT\n" +
" a.pay_user_id as user_id,\n" +
" b.type_id,\n" +
" b.cluster_id clusterId,\n" +
" b.cluster_user clusterUser,\n" +
" GROUP_CONCAT( b.job ) jobs,\n" +
" b.start_time date_time,\n" +
" b.create_at createAt \n" +
"FROM\n" +
" cluster_user a\n" +
" LEFT JOIN warning_info_item b ON a.cluster_id = b.cluster_id AND a.`user` = b.cluster_user \n" +
"WHERE\n" +
" b.type_id = 2 \n" +
"GROUP BY\n" +
" a.pay_user_id\n" +
" \n" +
"UNION ALL \n" +
"SELECT\n" +
" d.pay_user_id as user_id,\n" +
" b.type_id,\n" +
" b.cluster_id clusterId,\n" +
" b.cluster_user clusterUser,\n" +
" GROUP_CONCAT( b.job ) jobs,\n" +
" b.start_time date_time,\n" +
" b.create_at createAt \n" +
"FROM\n" +
" cluster_user a\n" +
" LEFT JOIN warning_info_item b ON a.cluster_id = b.cluster_id AND a.`user` = b.cluster_user \n" +
" LEFT JOIN user_info c ON a.pay_user_id=c.id\n" +
" LEFT JOIN account_group d ON c.group_id = d.id\n" +
"WHERE\n" +
" b.type_id = 2\n" +
"GROUP BY d.pay_user_id\n" +
")e\n" +
"GROUP BY e.user_id");
ListItemReader<Map<String, Object>> listItemReader = new ListItemReader<Map<String, Object>>(lstWarning);
return listItemReader;
}
Process
数据进行二次清洗,根据业务来,Process阶段本来就是可选的,主要是照顾流程化作业需要
/**
* 数据清洗,转换为console-biz模块告警消息格式
* @return
*/
@Bean("runningOverSecondProcess")
public ItemProcessor<Map<String, Object>, Map<String, Object>> runningOverProcessSecond() {
Format format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
ObjectMapper mapper = new ObjectMapper();
ItemProcessor<Map<String, Object>, Map<String, Object>> itemProcessor = ehrOrg -> {
ehrOrg.put("date_time",ehrOrg.get("date_time") != null?format.format(Date.from(((LocalDateTime)ehrOrg.get("date_time")).toInstant(ZoneOffset.of("+8")))):"");
ehrOrg.put("createAt",ehrOrg.get("createAt") != null?format.format(Date.from(((LocalDateTime)ehrOrg.get("createAt")).toInstant(ZoneOffset.of("+8")))):"");
//组装报警描述
String jobStr = (String) ehrOrg.get("jobs");
if(StringUtils.hasText(jobStr)){
List<Map<String,String>> list = mapper.readValue(jobStr, List.class);
for (Map<String, String> map : list) {
String msg = "您的作业" + map.get("job_id") +"等已经运行超过1 天,请注意检查运行情况,避免机时浪费。 如有问题,可咨询在线支持工程师或您的客户经理,感谢您的支持";
ehrOrg.put("msg",msg);
break;
}
}
return ehrOrg;
};
return itemProcessor;
}
Writer
自定义Writer,可以同时实现多个业务
/**
* 超时作业报警消息Step2输出器
* RabbitMQ AMQP
* MySql持久化
*/
@Component
@Slf4j
public class RunningOverSecondWriter implements ItemWriter {
/** 接收外部告警消息队列 */
private static String QUEUE_CONSOLE_NOTICE = "***";
@Autowired
private RabbitTemplate rabbitTemplate;
@Autowired
private JdbcTemplate jdbcTemplate;
@Autowired
private WarningMapper warningMapper;
@Value("${console.aliyun.ak}")
private String ossAK;
@Value("${console.aliyun.sk}")
private String ossSK;
@Value("${console.aliyun.endpoint}")
private String endpoint;
@Value("${console.warning.bucket.name}")
private String bucketName;
public RunningOverSecondWriter(RabbitTemplate rabbitTemplate) {
this.rabbitTemplate = rabbitTemplate;
}
@Override
public void write(List list) throws Exception {
OSS ossClient = new OSSClientBuilder().build(endpoint, ossAK, ossSK);
ObjectMapper mapper = new ObjectMapper();
List<WarningInfo> lstWarning = new ArrayList<>();
DateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (Object o : list) {
Map m = (Map)o;
/**
* 组装批量插入warning_info
* 判断jobs个数
* 1、超过5个需要保存到阿里云OSS,将OSS返回的路径保存到jobs字段,isOss字段为1
* 2、小于5个直接保存到jobs字段,isOss字段为0
*/
String jobs = (String) m.get("jobs");
String isOss = "0";
StringBuffer remark = new StringBuffer();
//生成告警id,取当前时间纳秒数
long id = System.nanoTime();
m.put("id",id);
if(StringUtils.isNotEmpty(jobs)){
List<Map> lstMap = mapper.readValue(jobs,List.class);
if(!CollectionUtils.isEmpty(lstMap) && lstMap.size() > 5){
//将字符流转换为json文件,保存OSS标志,后面需要用到
isOss = "1";
//保存到Oss,同时设置文件路径
jobs = uploadJobs2Oss(ossClient,jobs,id).toString();
}
/**
* 拼装报警详情,展示数据:作业号、作业名、队列、节点数目、占用核数,运行时间
* {job_id=21309, job_name=vasp-bingxing.sh, partition=v6_384, node_num=1, cores_num=24,run_time=5}
*/
remark.append("<div style=\"display: flex; border-bottom: 2px solid #ccc;text-align: center;align-items: center;font-size:12px;\">\n" +
" <div style=\"flex:1; padding: 10px 5px\">\n" +
" 作业号\n" +
" </div>\n" +
" <div style=\"flex:1; padding: 10px 5px\">\n" +
" 作业名\n" +
" </div>\n" +
" <div style=\"flex:1; padding: 10px 5px\">\n" +
" 队列\n" +
" </div>\n" +
" <div style=\"width: 34px; padding: 10px 5px\">\n" +
" 节点数目\n" +
" </div>\n" +
" <div style=\"width: 34px; padding: 10px 5px\">\n" +
" 占用核数\n" +
" </div>\n" +
" <div style=\"width: 34px; padding: 10px 5px\">\n" +
" 运行天数\n" +
" </div>\n" +
" </div>");
for (Map map : lstMap) {
remark.append("<div style=\"display: flex; border-bottom: 1px solid #eee;text-align: center;align-items: center;color:#333;\">\n");
remark.append("<div style=\"flex:1; padding: 10px 5px;word-break: break-all;color: rgb(0, 76, 161);\">\n");
remark.append(map.get("job_id")).append("\n");
remark.append("</div>\n");
remark.append("<div style=\"flex:1; padding: 10px 5px; word-break: break-all;\">\n");
remark.append(map.get("job_name")).append("\n");
remark.append("</div>\n");
remark.append("<div style=\"flex:1; padding: 10px 5px;word-break: break-all;\">\n");
remark.append(map.get("partition")).append("\n");;
remark.append("</div>\n");
remark.append("<div style=\"width: 34px; padding: 10px 5px\">");
remark.append(map.get("node_num")).append("\n");;
remark.append("</div>\n");
remark.append("<div style=\"width: 34px; padding: 10px 5px\">");
remark.append(map.get("cores_num")).append("\n");;
remark.append("</div>\n");
remark.append("<div style=\"width: 34px; padding: 10px 5px\">");
remark.append(map.get("run_time")).append("\n");;
remark.append("</div>\n");
remark.append("</div>");
}
//显示详情
m.put("remark",remark.toString());
}
WarningInfo warningInfo = settingWarningInfo(format, m, jobs, isOss);
//发送MQ
rabbitTemplate.convertAndSend(QUEUE_CONSOLE_NOTICE,mapper.writeValueAsString(warningInfo));
lstWarning.add(warningInfo);
}
//关闭OSS客户端连接
ossClient.shutdown();
//批量插入warning_info
warningMapper.batchInsert(lstWarning);
//删除中间表原始数据
jdbcTemplate.execute("delete from warning_info_item");
}
private WarningInfo settingWarningInfo(DateFormat format, Map m, String jobs, String isOss) throws ParseException {
WarningInfo warningInfo = new WarningInfo();
warningInfo.setId(BigInteger.valueOf((Long) m.get("id")));
warningInfo.setUserId((String) m.get("user_id"));
warningInfo.setTypeId((Integer) m.get("type_id"));
warningInfo.setMsg((String) m.get("msg"));
warningInfo.setIsOss(isOss);
warningInfo.setJobs(jobs);
warningInfo.setIsDetailInfo(true);
warningInfo.setRemark((String) m.get("remark"));
warningInfo.setDatetime(format.parse((String) m.get("date_time")));
warningInfo.setCreateAt(format.parse((String) m.get("createAt")));
return warningInfo;
}
private URL uploadJobs2Oss(OSS ossClient, String content, long fileName){
PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, "warning/" + fileName + ".json", new ByteArrayInputStream(content.getBytes()));
// 上传字符串。
ossClient.putObject(putObjectRequest);
//获取文件签名路径用于保存到warning_info表
// 设置签名URL过期时间为3600秒(1小时)* 24 * 30,我擦,为什么会是减法
Date expiration = new Date(Calendar.getInstance().getTime().getTime() - 3600 * 1000 * 24 * 30);
// 生成以GET方法访问的签名URL,访客可以直接通过浏览器访问相关内容。
URL url = ossClient.generatePresignedUrl(bucketName, "warning/" + fileName + ".json", expiration);
return url;
}
Step
@Autowired
private JobBuilderFactory jobBuilderFactory;
@Autowired
private StepBuilderFactory stepBuilderFactory;
@Autowired
@Qualifier("runningOverFirstReader")
private JsonItemReader runningOverFirstReader;
@Autowired
@Qualifier("runningOverFirstProcess")
private ItemProcessor runningOverFirstProcess;
@Autowired
private RunningOverFirstWriter runningOverFirstWriter;
@Autowired
@Qualifier("runningOverSecondReader")
private ListItemReader runningOverSecondReader;
@Autowired
@Qualifier("runningOverSecondProcess")
private ItemProcessor runningOverSecondProcess;
@Autowired
private RunningOverSecondWriter runningOverSecondWriter;
/**
* 超时作业报警消息处理第一步:
* @return
*/
public Step runningOverFirstStep() {
return stepBuilderFactory.get("runningOverFirstStep")
//提交事务之前要处理的数据个数,可以理解为"分页" 理论上如果数据够用,越大越好,但是如果不够用,会额外消耗准备时间(类似于i++操作)
.<RunningOverVO, WarningInfoItem>chunk(100)
.reader(runningOverFirstReader)
.processor(runningOverFirstProcess)
.writer(runningOverFirstWriter)
.build();
}
/**
* 超时作业报警消息处理第二步:
* @return
*/
public Step runningOverSecondStep() {
return stepBuilderFactory.get("runningOverSecondStep")
.chunk(50)
.reader(runningOverSecondReader)
.processor(runningOverSecondProcess)
.writer(runningOverSecondWriter)
.build();
}
Flow
这个非常类似于工作流,我甚至觉得可以当成工作流来应用!
@Bean
public Flow runningOverFlow() {
return new FlowBuilder<SimpleFlow>("runningOverFlow")
.start(runningOverFirstStep())
.next(runningOverSecondStep())
.build();
}
Job
最小执行单元
@Bean
public Job runningOverJob() {
return jobBuilderFactory.get("runningOverJob")
.start(runningOverFlow())
.build() //builds FlowJobBuilder instance
.build(); //builds Job instance
}
测试执行
通过Job执行器执行Job
//这个地方纯粹的只是为了打印一下每个Step执行时间,你完全可以new 一个空参数对象
JobParameters jobParameters = new JobParametersBuilder().addLong("time", System.currentTimeMillis())
.toJobParameters();
try {
jobLauncher.run(runningOverJob, jobParameters);
} catch (Exception e.printStack()) {
e.printStack()
}
初次启动服务会生成Spring Batch相关的表,用于驱动Job执行:
测试结果: