JobGraph的构建
进入env.execute()方法
public JobExecutionResult execute(String jobName) throws Exception {
final List<Transformation<?>> originalTransformations = new ArrayList<>(transformations);
// 构建StreamGraph
StreamGraph streamGraph = getStreamGraph();
if (jobName != null) {
streamGraph.setJobName(jobName);
}
try {
return execute(streamGraph);
} catch (Throwable t) {
Optional<ClusterDatasetCorruptedException> clusterDatasetCorruptedException =
ExceptionUtils.findThrowable(t, ClusterDatasetCorruptedException.class);
if (!clusterDatasetCorruptedException.isPresent()) {
throw t;
}
// Retry without cache if it is caused by corrupted cluster dataset.
invalidateCacheTransformations(originalTransformations);
streamGraph = getStreamGraph(originalTransformations);
// 获取构建好的StreamGraph,并执行StreamGraph
// 进去execute()方法
return execute(streamGraph);
}
}
进入execute()方法
public JobExecutionResult execute(StreamGraph streamGraph) throws Exception {
// 进入executeAsync()异步方法
final JobClient jobClient = executeAsync(streamGraph);
try {
final JobExecutionResult jobExecutionResult;
if (configuration.getBoolean(DeploymentOptions.ATTACHED)) {
jobExecutionResult = jobClient.getJobExecutionResult().get();
} else {
jobExecutionResult = new DetachedJobExecutionResult(jobClient.getJobID());
}
jobListeners.forEach(
jobListener -> jobListener.onJobExecuted(jobExecutionResult, null));
return jobExecutionResult;
} catch (Throwable t) {
// get() on the JobExecutionResult Future will throw an ExecutionException. This
// behaviour was largely not there in Flink versions before the PipelineExecutor
// refactoring so we should strip that exception.
Throwable strippedException = ExceptionUtils.stripExecutionException(t);
jobListeners.forEach(
jobListener -> {
jobListener.onJobExecuted(null, strippedException);
});
ExceptionUtils.rethrowException(strippedException);
// never reached, only make javac happy
return null;
}
}
public JobClient executeAsync(StreamGraph streamGraph) throws Exception {
checkNotNull(streamGraph, "StreamGraph cannot be null.");
final PipelineExecutor executor = getPipelineExecutor();
// 异步提交得到future 进入execute()方法
CompletableFuture<JobClient> jobClientFuture =
executor.execute(streamGraph, configuration, userClassloader);
try {
// 获取StreamGraph的执行结果
JobClient jobClient = jobClientFuture.get();
jobListeners.forEach(jobListener -> jobListener.onJobSubmitted(jobClient, null));
collectIterators.forEach(iterator -> iterator.setJobClient(jobClient));
collectIterators.clear();
return jobClient;
} catch (ExecutionException executionException) {
final Throwable strippedException =
ExceptionUtils.stripExecutionException(executionException);
jobListeners.forEach(
jobListener -> jobListener.onJobSubmitted(null, strippedException));
throw new FlinkException(
String.format("Failed to execute job '%s'.", streamGraph.getJobName()),
strippedException);
}
}
该方法中的pipeline其实就是StreamGraph
进入getJobGraph()方法 查看具体构建流程
public static JobGraph getJobGraph(
@Nonnull final Pipeline pipeline,
@Nonnull final Configuration configuration,
@Nonnull ClassLoader userClassloader)
throws MalformedURLException {
checkNotNull(pipeline);
checkNotNull(configuration);
final ExecutionConfigAccessor executionConfigAccessor =
ExecutionConfigAccessor.fromConfiguration(configuration);
// 构建JobGraph 进入该方法
final JobGraph jobGraph =
FlinkPipelineTranslationUtil.getJobGraph(
userClassloader,
pipeline,
configuration,
executionConfigAccessor.getParallelism());
configuration
.getOptional(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID)
.ifPresent(strJobID -> jobGraph.setJobID(JobID.fromHexString(strJobID)));
if (configuration.getBoolean(DeploymentOptions.ATTACHED)
&& configuration.getBoolean(DeploymentOptions.SHUTDOWN_IF_ATTACHED)) {
jobGraph.setInitialClientHeartbeatTimeout(
configuration.getLong(ClientOptions.CLIENT_HEARTBEAT_TIMEOUT));
}
jobGraph.addJars(executionConfigAccessor.getJars());
jobGraph.setClasspaths(executionConfigAccessor.getClasspaths());
jobGraph.setSavepointRestoreSettings(executionConfigAccessor.getSavepointRestoreSettings());
return jobGraph;
}
进入 FlinkPipelineTranslationUtil.getJobGraph()
进入pipelineTranslator.translateToJobGraph()具体实现类StreamGraphTranslator
public class StreamGraphTranslator implements FlinkPipelineTranslator {
private static final Logger LOG = LoggerFactory.getLogger(StreamGraphTranslator.class);
private final ClassLoader userClassloader;
public StreamGraphTranslator(ClassLoader userClassloader) {
this.userClassloader = userClassloader;
}
@Override
public JobGraph translateToJobGraph(
Pipeline pipeline, Configuration optimizerConfiguration, int defaultParallelism) {
checkArgument(
pipeline instanceof StreamGraph, "Given pipeline is not a DataStream StreamGraph.");
StreamGraph streamGraph = (StreamGraph) pipeline;
// 通过StreamGraph转换得到getJobGraph
return streamGraph.getJobGraph(userClassloader, null);
}
}
进入streamGraph.getJobGraph(userClassloader, null)方法
public JobGraph getJobGraph(ClassLoader userClassLoader, @Nullable JobID jobID) {
return StreamingJobGraphGenerator.createJobGraph(userClassLoader, this, jobID);
}
private JobGraph createJobGraph() {
preValidate();
jobGraph.setJobType(streamGraph.getJobType());
jobGraph.setDynamic(streamGraph.isDynamic());
jobGraph.enableApproximateLocalRecovery(
streamGraph.getCheckpointConfig().isApproximateLocalRecoveryEnabled());
// Generate deterministic hashes for the nodes in order to identify them across
// submission iff they didn't change.
Map<Integer, byte[]> hashes =
defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);
// Generate legacy version hashes for backwards compatibility
List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
}
// 合并算子链
/**
设置Chaining 将可以chain到一起的StreamNode chain在一起
这里会生成相应的JobVertex、JobEdge、IntermediateDataSet对象
把能chain在一起的Operator都合并了,变成了OperatorChain
*/
setChaining(hashes, legacyHashes);
if (jobGraph.isDynamic()) {
setVertexParallelismsForDynamicGraphIfNecessary();
}
// Note that we set all the non-chainable outputs configuration here because the
// "setVertexParallelismsForDynamicGraphIfNecessary" may affect the parallelism of job
// vertices and partition-reuse
final Map<Integer, Map<StreamEdge, NonChainedOutput>> opIntermediateOutputs =
new HashMap<>();
setAllOperatorNonChainedOutputsConfigs(opIntermediateOutputs);
setAllVertexNonChainedOutputsConfigs(opIntermediateOutputs);
// 设置PhysicalEdge 将每个JobVertext的入边集合也序列化到该JobVertex的StreamConfig中
setPhysicalEdges();
markSupportingConcurrentExecutionAttempts();
validateHybridShuffleExecuteInBatchMode();
setSlotSharingAndCoLocation();
setManagedMemoryFraction(
Collections.unmodifiableMap(jobVertices),
Collections.unmodifiableMap(vertexConfigs),
Collections.unmodifiableMap(chainedConfigs),
id -> streamGraph.getStreamNode(id).getManagedMemoryOperatorScopeUseCaseWeights(),
id -> streamGraph.getStreamNode(id).getManagedMemorySlotScopeUseCases());
configureCheckpointing();
jobGraph.setSavepointRestoreSettings(streamGraph.getSavepointRestoreSettings());
final Map<String, DistributedCache.DistributedCacheEntry> distributedCacheEntries =
JobGraphUtils.prepareUserArtifactEntries(
streamGraph.getUserArtifacts().stream()
.collect(Collectors.toMap(e -> e.f0, e -> e.f1)),
jobGraph.getJobID());
for (Map.Entry<String, DistributedCache.DistributedCacheEntry> entry :
distributedCacheEntries.entrySet()) {
jobGraph.addUserArtifact(entry.getKey(), entry.getValue());
}
// set the ExecutionConfig last when it has been finalized
try {
jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
} catch (IOException e) {
throw new IllegalConfigurationException(
"Could not serialize the ExecutionConfig."
+ "This indicates that non-serializable types (like custom serializers) were registered");
}
jobGraph.setChangelogStateBackendEnabled(streamGraph.isChangelogStateBackendEnabled());
addVertexIndexPrefixInVertexName();
setVertexDescription();
// Wait for the serialization of operator coordinators and stream config.
try {
FutureUtils.combineAll(
vertexConfigs.values().stream()
.map(
config ->
config.triggerSerializationAndReturnFuture(
serializationExecutor))
.collect(Collectors.toList()))
.get();
waitForSerializationFuturesAndUpdateJobVertices();
} catch (Exception e) {
throw new FlinkRuntimeException("Error in serialization.", e);
}
if (!streamGraph.getJobStatusHooks().isEmpty()) {
jobGraph.setJobStatusHooks(streamGraph.getJobStatusHooks());
}
return jobGraph;
}
进行StreamNode的合并 如果判断相邻的两个StreamNode可以合并,则会合并为一个Operatorchain
1、如果该节点是一个chain的头节点,就会生成一个JobVertex
2、如果不是头结点,就要把自身配置并入头结点,然后把头结点和自己的输出边相连,对于不能chain的节点,当做只有头节点处理即可。
作用:减少线程之间的切换的性能消耗,较少数据缓冲区的交换,降低序列化反序列化的压力,同时也能减少延迟提升吞吐量。
直接进入SetChain()
进入CreateChain()
private List<StreamEdge> createChain(
final Integer currentNodeId,
final int chainIndex,
final OperatorChainInfo chainInfo,
final Map<Integer, OperatorChainInfo> chainEntryPoints) {
Integer startNodeId = chainInfo.getStartNodeId();
if (!builtVertices.contains(startNodeId)) {
List<StreamEdge> transitiveOutEdges = new ArrayList<StreamEdge>();
// 存储可chain的StreamEdge
List<StreamEdge> chainableOutputs = new ArrayList<StreamEdge>();
// 存储不可chain的StreamEdge
List<StreamEdge> nonChainableOutputs = new ArrayList<StreamEdge>();
// 当前要处理的StreamNode
StreamNode currentNode = streamGraph.getStreamNode(currentNodeId);
// 遍历当前StreamNode的边,通过边拿到两边的StreamNode在判断是否能够合并
for (StreamEdge outEdge : currentNode.getOutEdges()) {
// 判断一个StreamEdge连接的上下游Operator(StreamNode)是否可以chain在一起
if (isChainable(outEdge, streamGraph)) {
// 将可合并的StreamEdge加入到集合
chainableOutputs.add(outEdge);
} else {
// 将不可合并的StreamEdge加入到集合
nonChainableOutputs.add(outEdge);
}
}
// 把可chain在一起的streamEdge 两边的Operator chain在一起形成合并算子链
for (StreamEdge chainable : chainableOutputs) {
// 这里进行了递归创建 如果可以chain在一起 这里的chainIndex+1
// 最直观的表达就是 A B 两个算子进行了合并 会在判断是否能和之前的算子 C 进行合并
transitiveOutEdges.addAll(
createChain(
chainable.getTargetId(),
chainIndex + 1,
chainInfo,
chainEntryPoints));
}
for (StreamEdge nonChainable : nonChainableOutputs) {
transitiveOutEdges.add(nonChainable);
createChain(
nonChainable.getTargetId(),
1, // operators start at position 1 because 0 is for chained source inputs
chainEntryPoints.computeIfAbsent(
nonChainable.getTargetId(),
(k) -> chainInfo.newChain(nonChainable.getTargetId())),
chainEntryPoints);
}
chainedNames.put(
currentNodeId,
createChainedName(
currentNodeId,
chainableOutputs,
Optional.ofNullable(chainEntryPoints.get(currentNodeId))));
chainedMinResources.put(
currentNodeId, createChainedMinResources(currentNodeId, chainableOutputs));
chainedPreferredResources.put(
currentNodeId,
createChainedPreferredResources(currentNodeId, chainableOutputs));
OperatorID currentOperatorId =
chainInfo.addNodeToChain(
currentNodeId,
streamGraph.getStreamNode(currentNodeId).getOperatorName());
if (currentNode.getInputFormat() != null) {
getOrCreateFormatContainer(startNodeId)
.addInputFormat(currentOperatorId, currentNode.getInputFormat());
}
if (currentNode.getOutputFormat() != null) {
getOrCreateFormatContainer(startNodeId)
.addOutputFormat(currentOperatorId, currentNode.getOutputFormat());
}
// 判断是否为合并算子链中的头节点 如果是头节点开始创建JobVertex
StreamConfig config =
currentNodeId.equals(startNodeId)
? createJobVertex(startNodeId, chainInfo)
: new StreamConfig(new Configuration());
tryConvertPartitionerForDynamicGraph(chainableOutputs, nonChainableOutputs);
setOperatorConfig(currentNodeId, config, chainInfo.getChainedSources());
setOperatorChainedOutputsConfig(config, chainableOutputs);
// we cache the non-chainable outputs here, and set the non-chained config later
opNonChainableOutputsCache.put(currentNodeId, nonChainableOutputs);
if (currentNodeId.equals(startNodeId)) {
chainInfo.setTransitiveOutEdges(transitiveOutEdges);
chainInfos.put(startNodeId, chainInfo);
config.setChainStart();
config.setChainIndex(chainIndex);
config.setOperatorName(streamGraph.getStreamNode(currentNodeId).getOperatorName());
config.setTransitiveChainedTaskConfigs(chainedConfigs.get(startNodeId));
} else {
chainedConfigs.computeIfAbsent(
startNodeId, k -> new HashMap<Integer, StreamConfig>());
config.setChainIndex(chainIndex);
StreamNode node = streamGraph.getStreamNode(currentNodeId);
config.setOperatorName(node.getOperatorName());
chainedConfigs.get(startNodeId).put(currentNodeId, config);
}
config.setOperatorID(currentOperatorId);
if (chainableOutputs.isEmpty()) {
config.setChainEnd();
}
return transitiveOutEdges;
} else {
return new ArrayList<>();
}
}
1、首先初始化了两个集合,来存储可chain和不可chain的StreamEdge,
2、然后获取到当前要处理的StreamNode
3、遍历当前StreamNode的边,来判断边两边上下游的StreamNode能否chain在一起,
4、将可以chain和不能chain的StreamEdge分别放入各自的集合
5、然后将可以chain的StreamNode,chain在一起形成一个OperatorChain,然后继续递归调用,判断chain完成后再下游的StreamNode能否继续chain在一起
6、将不能chain在一起的StreamNode取出,同样向下递归调用,判断下游的StreamNode能否和再下游的StreamNode合并。
7、在递归完成后判断当前节点是否是chain中的第一个StreamNode,如果是则开始构建JobVertex
8、同样判断当前节点是否是chain中的第一个StreamNode,如果是则开始构建JobEdge和IntermediateDataSet
进入isChainable() 查看合并条件
private static boolean isChainableInput(StreamEdge edge, StreamGraph streamGraph) {
// TODO 获取上下游端点
StreamNode upStreamVertex = streamGraph.getSourceVertex(edge);
StreamNode downStreamVertex = streamGraph.getTargetVertex(edge);
// TODO 判断是否能chain在一起
if (!(
// TODO 上下游算子实例处于同一个SlotSharingGroup中
upStreamVertex.isSameSlotSharingGroup(downStreamVertex)
// TODO 这里面有3个条件
&& areOperatorsChainable(upStreamVertex, downStreamVertex, streamGraph)
// TODO 两个算子建的物理分区逻辑是 ForwardPartitioner
&& (edge.getPartitioner() instanceof ForwardPartitioner)
// TODO 两个算子间的shuffle方式不等于批处理模式
&& edge.getShuffleMode() != ShuffleMode.BATCH
// TODO 上下游算子实例的并行度相同
&& upStreamVertex.getParallelism() == downStreamVertex.getParallelism()
// TODO 启动了chain
&& streamGraph.isChainingEnabled())) {
return false;
}
// check that we do not have a union operation, because unions currently only work
// through the network/byte-channel stack.
// we check that by testing that each "type" (which means input position) is used only once
for (StreamEdge inEdge : downStreamVertex.getInEdges()) {
if (inEdge != edge && inEdge.getTypeNumber() == edge.getTypeNumber()) {
return false;
}
}
return true;
}
进入areOperatorsChainable()
@VisibleForTesting
static boolean areOperatorsChainable(
StreamNode upStreamVertex, StreamNode downStreamVertex, StreamGraph streamGraph) {
// TODO 前后算子不能为空
StreamOperatorFactory<?> upStreamOperator = upStreamVertex.getOperatorFactory();
StreamOperatorFactory<?> downStreamOperator = downStreamVertex.getOperatorFactory();
if (downStreamOperator == null || upStreamOperator == null) {
return false;
}
// yielding operators cannot be chained to legacy sources
// unfortunately the information that vertices have been chained is not preserved at this
// point
if (downStreamOperator instanceof YieldingOperatorFactory
&& getHeadOperator(upStreamVertex, streamGraph).isLegacySource()) {
return false;
}
// we use switch/case here to make sure this is exhaustive if ever values are added to the
// ChainingStrategy enum
boolean isChainable;
// TODO 上游节点的chain策略为ALWAYS或HEAD(HEAD只能与下游连接,不能与上游连接,Source默认是HEAD)
switch (upStreamOperator.getChainingStrategy()) {
// TODO NEVER 表示该运算符将不会被链接到之前或之后的运算符
case NEVER:
isChainable = false;
break;
// TODO ALWAYS 表示 Operators将竭尽所能的连接在一起
case ALWAYS:
// TODO 运算符不会连接到上游,但是下游算子可以连接到此运算符
case HEAD:
case HEAD_WITH_SOURCES:
isChainable = true;
break;
default:
throw new RuntimeException(
"Unknown chaining strategy: " + upStreamOperator.getChainingStrategy());
}
// TODO 下游节点的chain策略为ALWAYS(可以与上下游连接,map、flatmap、filter等默认是ALWAYS)
switch (downStreamOperator.getChainingStrategy()) {
case NEVER:
case HEAD:
isChainable = false;
break;
case ALWAYS:
// keep the value from upstream
break;
case HEAD_WITH_SOURCES:
// only if upstream is a source
isChainable &= (upStreamOperator instanceof SourceOperatorFactory);
break;
default:
throw new RuntimeException(
"Unknown chaining strategy: " + upStreamOperator.getChainingStrategy());
}
return isChainable;
}
总结:
1、首先初始化了两个集合,来存储可chain和不可chain的StreamEdge,
2、然后获取到当前要处理的StreamNode
3、遍历当前StreamNode的边,来判断边两边上下游的StreamNode能否chain在一起,
4、将可以chain和不能chain的StreamEdge分别放入各自的集合
5、然后将可以chain的StreamNode,chain在一起形成一个OperatorChain,然后继续递归调用,判断chain完成后再下游的StreamNode能否继续chain在一起
6、将不能chain在一起的StreamNode取出,同样向下递归调用,判断下游的StreamNode能否和再下游的StreamNode合并。
7、在递归完成后判断当前节点是否是chain中的第一个StreamNode,如果是则开始构建JobVertex
8、同样判断当前节点是否是chain中的第一个StreamNode,如果是则开始构建JobEdge和IntermediateDataSet