flink任务管理yarn perJob模式代码实现
使用场景:flink云平台或者是公司内部flink任务管理平台,提交任务到yarn的流程代码
代码来源:分析yarn submit job 流程整合
功能:
- 支持动态提交flink任务到yarn
- 支持关闭flink任务
- 支持实时获取任务状态
- 支持实时获取任务jobId
- 支持实时获取flinkWel
- 支持回去yarn ClusterClient
1. java代码实现
1.1 maven依赖
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<hadoop.version>2.8.5</hadoop.version>
<scala.binary.version>2.11</scala.binary.version>
<flink.version>1.13.6</flink.version>
<fastjson.version>1.2.75</fastjson.version>
<cn.hutool.all.version>5.5.2</cn.hutool.all.version>
</properties>
<dependencies>
<!-- hadoop 相关依赖 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- flink 相关依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-yarn_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>flink-shaded-hadoop-2</artifactId>
<groupId>org.apache.flink</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- 其他工具包 -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>${cn.hutool.all.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
</dependencies>
1.2 相关封装的实体类
- JobConfig.java
**
* yarn flink 任务提交参数
*/
@Data
public class JobConfig {
// Application name
private String applicationName;
// 任务名
private String jobName;
// Flink配置目录 flink/conf
private String flinkConfDir;
// Hadoop配置目录 hadoop/conf
private String hadoopConfDir;
// Flink 集群相关jar包目录 hdfs路径
private String flinkLibs;
// Flink 插件目录 hdfs路径
private String flinkPlugins;
// 执行的jar包路径 hdfs路径
private String userJarPath;
// flink-dist_2.11-1.13.1.jar的存储路径 hdfs路径
private String flinkDistJar;
// Flink 参数
private Map<String, String> flinkArgs;
// 任务运行时参数
private String[] executeArgs;
// 应用程序入口
private String mainClass;
// 用户依赖的udf路径
private List<String> udfList;
}
- JobStatu.java
/**
* flink 任务状态枚举
* 复制源码org.apache.flink.api.common.JobStatus
*/
public enum JobStatus {
// 提交,自定义
SUBMITTED,
// 初始化(调度程序已收到作业,正在等待创建作业管理器)
INITIALIZING,
/** Job is newly created, no task has started to run. */
CREATED,
/** Some tasks are scheduled or running, some may be pending, some may be finished. */
RUNNING,
/** The job has failed and is currently waiting for the cleanup to complete. */
FAILING,
/** The job has failed with a non-recoverable task failure. */
FAILED,
/** Job is being cancelled. */
CANCELLING,
/** Job has been cancelled. */
CANCELED,
/** All of the job's tasks have successfully finished. */
FINISHED,
/** The job is currently undergoing a reset and total restart. */
RESTARTING,
/**
* The job has been suspended which means that it has been stopped but not been removed from a
* potential HA job store.
*/
SUSPENDED,
/** The job is currently reconciling and waits for task execution report to recover state. */
RECONCILING;
public static String showName(JobStatus state) {
switch (state) {
case CREATED:
return "创建";
case SUBMITTED:
return "提交";
case CANCELED:
return "取消";
case RUNNING:
return "运行";
case FAILED:
return "失败";
case RESTARTING:
return "重启";
case CANCELLING:
return "取消中";
case SUSPENDED:
return "暂停";
case INITIALIZING:
return "初始化";
case FINISHED:
return "结束";
default:
return "未知";
}
}
}
- JobStatusInfo.java
import lombok.AllArgsConstructor;
import lombok.Data;
/**
* 任务提交后-状态信息
*/
@Data
@AllArgsConstructor
public class JobStatusInfo {
// yarn applicationId
private String applicationId;
// 任务名称
private String jobName;
// flink 任务jobId
private String jobId;
// 任务状态枚举 参考源码
private JobStatus jobStatus;
// 启动时间
private Long startTime;
// flink 任务启动任务控制台地址
private String webInterfaceUrl;
}
1.3 API-METHOD 功能实现
- YarnUtils.java
import com.google.common.base.Throwables;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import java.io.IOException;
@Slf4j
public class YarnUtils {
/**
* 创建yarn客户端
*
* @param hadoopConfig
* @return
*/
public static YarnClient createYarnClient(String hadoopConfig) {
if (StringUtils.isBlank(hadoopConfig)) {
return null;
}
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init