ExecutionGraph生成

ExecutionGraph

jobManager在接收到submitJob消息之后,会先根据jobGraph生成ExecutionGraph。

1,使用ExecutionGraphBuilder的buildGraph方法生成ExecutionGraph;

executionGraph = ExecutionGraphBuilder.buildGraph(
          executionGraph,
          jobGraph,
          flinkConfiguration,
          futureExecutor,
          ioExecutor,
          scheduler,
          userCodeLoader,
          checkpointRecoveryFactory,
          Time.of(timeout.length, timeout.unit),
          restartStrategy,
          jobMetrics,
          numSlots,
          blobServer,
          resultPartitionLocationTrackerProxy,
          Time.milliseconds(allocationTimeout),
          log.logger)

2,深入到buildGraph方法,如果Execution不存在,new一个新的Execution;

executionGraph = (prior != null) ? prior :
				new ExecutionGraph(
					jobInformation,
					futureExecutor,
					ioExecutor,
					rpcTimeout,
					restartStrategy,
					failoverStrategy,
					slotProvider,
					classLoader,
					blobWriter,
					resultPartitionLocationTrackerProxy,
					allocationTimeout,
					metrics,
					jobManagerConfig);

3,设置一些基础属性:allowQueuedScheduling,jsonPlan;

executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling());
executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph));

4,attachJobGraph,生成Graph的节点和边

// topologically sort the job vertices and attach the graph to the existing one
		List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
		if (log.isDebugEnabled()) {
			log.debug("Adding {} vertices from job graph {} ({}).", sortedTopology.size(), jobName, jobId);
		}
		executionGraph.attachJobGraph(sortedTopology);

深入到attachJobGraph方法,将JobVertex封装成ExecutionJobVertex,然后再用edge将节点连起来;

public void attachJobGraph(List<JobVertex> topologicallySorted) throws JobException {

		final ArrayList<ExecutionJobVertex> newExecJobVertices = new ArrayList<>(topologicallySorted.size());
		inal long createTimestamp = System.currentTimeMillis();

		for (JobVertex jobVertex : topologicallySorted) {

			if (jobVertex.isInputVertex() && !jobVertex.isStoppable()) {
				this.isStoppable = false;
			}

			// 在新建ExecutionJobVertex时已经把中间结果集都初始化好。
			ExecutionJobVertex ejv = new ExecutionJobVertex(
				this,
				jobVertex,
				1,
				rpcTimeout,
				globalModVersion,
				createTimestamp);
            //把节点用edge相连
			ejv.connectToPredecessors(this.intermediateResults);
			
			//ExecutionJobVertex建好并与input建立好edge,存入map中
			ExecutionJobVertex previousTask = this.tasks.putIfAbsent(jobVertex.getID(), ejv);
			for (IntermediateResult res : ejv.getProducedDataSets()) {
				IntermediateResult previousDataSet = 						this.intermediateResults.putIfAbsent(res.getId(), res);
			}
			this.verticesInCreationOrder.add(ejv);
			this.numVerticesTotal += ejv.getParallelism();
			newExecJobVertices.add(ejv);
		}

		terminationFuture = new CompletableFuture<>();
		failoverStrategy.notifyNewVertices(newExecJobVertices);
	}

IntermediateResult 对应一个 Job Edge 的输出下游结果集,一个 IntermediateResult 包含多个 IntermediateResultPartition,一个 IntermediateResultPartition 对应一个并行任务 ExecutionVertex 的输出结果。

深入到connectToPredecessors,看看是如何连接ExecutionJobVertex的;

public void connectToPredecessors(Map<IntermediateDataSetID, IntermediateResult> intermediateDataSets) throws JobException {
		//获取节点所有输入边
		List<JobEdge> inputs = jobVertex.getInputs();
		for (int num = 0; num < inputs.size(); num++) {
			JobEdge edge = inputs.get(num);
			//取出JobEdge的source IntermediateResult
			IntermediateResult ires = intermediateDataSets.get(edge.getSourceId());

			this.inputs.add(ires);
			//将当前vertex作为consumer注册到IntermediateResult的每个IntermediateResultPartition
			int consumerIndex = ires.registerConsumer();

			for (int i = 0; i < parallelism; i++) {
				ExecutionVertex ev = taskVertices[i];
				//为每个ExecutionVertex建立到具体IntermediateResultPartition的ExecutionEdge
				ev.connectSource(num, ires, edge, consumerIndex);
			}
		}
	}

深入到connectSource方法,

public void connectSource(int inputNumber, IntermediateResult source, JobEdge edge, int consumerNumber) {

		final DistributionPattern pattern = edge.getDistributionPattern();
		final IntermediateResultPartition[] sourcePartitions = source.getPartitions();

		ExecutionEdge[] edges;

		switch (pattern) {
			case POINTWISE:
				edges = connectPointwise(sourcePartitions, inputNumber);
				break;

			case ALL_TO_ALL:
				edges = connectAllToAll(sourcePartitions, inputNumber);
				break;

			default:
				throw new RuntimeException("Unrecognized distribution pattern.");

		}

		this.inputEdges[inputNumber] = edges;

		// add the consumers to the source
		// for now (until the receiver initiated handshake is in place), we need to register the
		// edges as the execution graph
		for (ExecutionEdge ee : edges) {
			ee.getSource().addConsumer(ee, consumerNumber);
		}
	}

如果pattern是POINTWISE的话,看下connectPointwise方法:

private ExecutionEdge[] connectPointwise(IntermediateResultPartition[] sourcePartitions, int inputNumber) {
		//source节点subtask个数
		final int numSources = sourcePartitions.length;
		//subtasks并发度
		final int parallelism = getTotalNumberOfParallelSubtasks();

		// source节点subtask个数等于当前节点并发度的话,取sourcePartitions中subTaskIndex对应的partition,一一对应
		if (numSources == parallelism) {
			return new ExecutionEdge[] { new ExecutionEdge(sourcePartitions[subTaskIndex], this, inputNumber) };
		}
		else if (numSources < parallelism) { //如果并发度比partition个数多,那一个source会对应于多个task
			int sourcePartition;
			//当前节点并发度整除source节点subtask个数的情况下,每个source对应相同数目的task,假设有2个source,6个task,第3,4,5个task对应第1个source
			if (parallelism % numSources == 0) {
				// same number of targets per source
				int factor = parallelism / numSources;
				sourcePartition = subTaskIndex / factor;
			}
			else {
				// 比如有2个source,7个task,第0个source对应第0,1,2个task,第1个source对应第3,4,5,6个task
				float factor = ((float) parallelism) / numSources;
				sourcePartition = (int) (subTaskIndex / factor);
			}

			return new ExecutionEdge[] { new ExecutionEdge(sourcePartitions[sourcePartition], this, inputNumber) };
		}
		else {//多个source对应一个task
			//source节点subtask个数整除当前节点并发度的情况下,每个task有numSources / parallelism个source,假设有6个source,2个task,则第3,4,5个source对应第1个task
			if (numSources % parallelism == 0) {
				int factor = numSources / parallelism;
				int startIndex = subTaskIndex * factor;

				ExecutionEdge[] edges = new ExecutionEdge[factor];
				for (int i = 0; i < factor; i++) {
					edges[i] = new ExecutionEdge(sourcePartitions[startIndex + i], this, inputNumber);
				}
				return edges;
			}
			else {
				//比如有7个source, 2个task,第0,1,2个source对应第0个task,第3、4、5、6个source对应第1个task
				float factor = ((float) numSources) / parallelism;
				int start = (int) (subTaskIndex * factor);
				int end = (subTaskIndex == getTotalNumberOfParallelSubtasks() - 1) ?
						sourcePartitions.length :
						(int) ((subTaskIndex + 1) * factor);

				ExecutionEdge[] edges = new ExecutionEdge[end - start];
				for (int i = 0; i < edges.length; i++) {
					edges[i] = new ExecutionEdge(sourcePartitions[start + i], this, inputNumber);
				}

				return edges;
			}
		}
	}

如果pattern是ALL_TO_ALL的话,看下connectAllToAll方法:

private ExecutionEdge[] connectAllToAll(IntermediateResultPartition[] sourcePartitions, int inputNumber) {
		ExecutionEdge[] edges = new ExecutionEdge[sourcePartitions.length];
		for (int i = 0; i < sourcePartitions.length; i++) {
			IntermediateResultPartition irp = sourcePartitions[i];
			edges[i] = new ExecutionEdge(irp, this, inputNumber);
		}

		return edges;
	}

把所有的source都指向每个task。

5,配置checkpoint

JobCheckpointingSettings snapshotSettings = jobGraph.getCheckpointingSettings();
		if (snapshotSettings != null) {
			...
			executionGraph.enableCheckpointing(
				chkConfig.getCheckpointInterval(),
				chkConfig.getCheckpointTimeout(),
				chkConfig.getMinPauseBetweenCheckpoints(),
				chkConfig.getMaxConcurrentCheckpoints(),
				chkConfig.getCheckpointRetentionPolicy(),
				triggerVertices,
				ackVertices,
				confirmVertices,
				hooks,
				checkpointIdCounter,
				completedCheckpoints,
				rootBackend,
				checkpointStatsTracker);
		}

checkpoint之后专门讨论

6,设置metrics

metrics.gauge(RestartTimeGauge.METRIC_NAME, new RestartTimeGauge(executionGraph));
		metrics.gauge(DownTimeGauge.METRIC_NAME, new DownTimeGauge(executionGraph));
		metrics.gauge(UpTimeGauge.METRIC_NAME, new UpTimeGauge(executionGraph));
		metrics.gauge(NumberOfFullRestartsGauge.METRIC_NAME, new NumberOfFullRestartsGauge(executionGraph));

		executionGraph.getFailoverStrategy().registerMetrics(metrics);
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值