StreamTask 执行

画画的老顽童

已于 2022-01-19 14:09:29 修改

阅读量306

点赞数

分类专栏： flink 文章标签：大数据 1024程序员节

于 2021-04-17 23:29:42 首次发布

本文链接：https://blog.csdn.net/m0_46449152/article/details/115803741

版权

flink 专栏收录该内容

17 篇文章 5 订阅

订阅专栏

1、TaskExecutor 执行一个 Task

TaskExecutor.submitTask
  Task task = new Task(
  // TODO   taskExecutorServices.createShuffleEnvironment
  taskExecutorServices.getShuffleEnvironment()

创建ShuffleEnvironment T
TaskManager启动时创建ShuffleEnvironment
startTaskManager->  TaskManagerServices.fromConfiguration ->createShuffleEnvironment 
    -> NettyShuffleServiceFactory.createShuffleEnvironment -> createNettyShuffleEnvironment(){
    		NettyConfig nettyConfig = config.nettyConfig();

		/*************************************************
		 * TODO_MA  
		 *  注释： 返回： FileChannelManagerImpl
		 */
		FileChannelManager fileChannelManager = new FileChannelManagerImpl(config.getTempDirs(), DIR_NAME_PREFIX);

		/*************************************************
		 * TODO_MA  
		 *  注释： 返回： NettyConnectionManager
		 */
		ConnectionManager connectionManager = nettyConfig != null ?
			new NettyConnectionManager(resultPartitionManager, taskEventPublisher, nettyConfig)
			:new LocalConnectionManager();

		/*************************************************
		 * TODO_MA  
		 *  注释： 返回： NetworkBufferPool
		 */
		NetworkBufferPool networkBufferPool = new NetworkBufferPool(config.numNetworkBuffers(), config.networkBufferSize(),
			config.networkBuffersPerChannel(), config.getRequestSegmentsTimeout());

		registerShuffleMetrics(metricGroup, networkBufferPool);

		/*************************************************
		 * TODO_MA  
		 *  注释： 构建 ResultPartitionFactory
		 */
		ResultPartitionFactory resultPartitionFactory = new ResultPartitionFactory(resultPartitionManager, fileChannelManager, networkBufferPool,
			config.getBlockingSubpartitionType(), config.networkBuffersPerChannel(), config.floatingNetworkBuffersPerGate(),
			config.networkBufferSize(), config.isForcePartitionReleaseOnConsumption(), config.isBlockingShuffleCompressionEnabled(),
			config.getCompressionCodec(), config.getMaxBuffersPerChannel());

		/*************************************************
		 * TODO_MA  
		 *  注释： 构建 SingleInputGateFactory
		 */
		SingleInputGateFactory singleInputGateFactory = new SingleInputGateFactory(taskExecutorResourceId, config, connectionManager,
			resultPartitionManager, taskEventPublisher, networkBufferPool);

		/*************************************************
		 * TODO_MA  
		 *  注释： NettyShuffleEnvironment
		 */
		return new NettyShuffleEnvironment(taskExecutorResourceId, config, networkBufferPool, connectionManager, resultPartitionManager,
			fileChannelManager, resultPartitionFactory, singleInputGateFactory, ioExecutor);
    }
 --> NettyConnectionManager
	public NettyConnectionManager(ResultPartitionProvider partitionProvider, TaskEventPublisher taskEventPublisher, NettyConfig nettyConfig) {

		/*************************************************
		 * TODO
		 *  注释： 初始化一个 NettyServer
		 */
		this.server = new NettyServer(nettyConfig);

		/*************************************************
		 * TODO
		 *  注释： 初始化一个 NettyClient
		 */
		this.client = new NettyClient(nettyConfig);

		this.bufferPool = new NettyBufferPool(nettyConfig.getNumberOfArenas());
		this.partitionRequestClientFactory = new PartitionRequestClientFactory(client);

		// TODO
		this.nettyProtocol = new NettyProtocol(checkNotNull(partitionProvider), checkNotNull(taskEventPublisher));
	}

-------------------------------
NettyServer 绑定handler PartitionRequestServerHandler
NettyClient 绑定handler CreditBasedPartitionRequestClientHandler
public class NettyProtocol {

	private final NettyMessage.NettyMessageEncoder
		messageEncoder = new NettyMessage.NettyMessageEncoder();

	private final ResultPartitionProvider partitionProvider;
	private final TaskEventPublisher taskEventPublisher;

	NettyProtocol(ResultPartitionProvider partitionProvider, TaskEventPublisher taskEventPublisher) {
		this.partitionProvider = partitionProvider;
		this.taskEventPublisher = taskEventPublisher;
	}

	/**
	 * Returns the server channel handlers.
	 *
	 * <pre>
	 * +-------------------------------------------------------------------+
	 * |                        SERVER CHANNEL PIPELINE                    |
	 * |                                                                   |
	 * |    +----------+----------+ (3) write  +----------------------+    |
	 * |    | Queue of queues     +----------->| Message encoder      |    |
	 * |    +----------+----------+            +-----------+----------+    |
	 * |              /|\                                 \|/              |
	 * |               | (2) enqueue                       |               |
	 * |    +----------+----------+                        |               |
	 * |    | Request handler     |                        |               |
	 * |    +----------+----------+                        |               |
	 * |              /|\                                  |               |
	 * |               |                                   |               |
	 * |   +-----------+-----------+                       |               |
	 * |   | Message+Frame decoder |                       |               |
	 * |   +-----------+-----------+                       |               |
	 * |              /|\                                  |               |
	 * +---------------+-----------------------------------+---------------+
	 * |               | (1) client request               \|/
	 * +---------------+-----------------------------------+---------------+
	 * |               |                                   |               |
	 * |       [ Socket.read() ]                    [ Socket.write() ]     |
	 * |                                                                   |
	 * |  Netty Internal I/O Threads (Transport Implementation)            |
	 * +-------------------------------------------------------------------+
	 * </pre>
	 *
	 * @return channel handlers
	 */
	public ChannelHandler[] getServerChannelHandlers() {
		PartitionRequestQueue queueOfPartitionQueues = new PartitionRequestQueue();

		/**
		 * TODO
		 *  负责处理消费端通过PartitionRequestClient发送的PartitionRequest和AddCredit请求
		 */
		PartitionRequestServerHandler serverHandler = new PartitionRequestServerHandler(
			partitionProvider,
			taskEventPublisher,
			queueOfPartitionQueues);

		return new ChannelHandler[] {
			messageEncoder,
			new NettyMessage.NettyMessageDecoder(),
			serverHandler,
			queueOfPartitionQueues
		};
	}

	/**
	 * Returns the client channel handlers.
	 *
	 * <pre>
	 *     +-----------+----------+            +----------------------+
	 *     | Remote input channel |            | request client       |
	 *     +-----------+----------+            +-----------+----------+
	 *                 |                                   | (1) write
	 * +---------------+-----------------------------------+---------------+
	 * |               |     CLIENT CHANNEL PIPELINE       |               |
	 * |               |                                  \|/              |
	 * |    +----------+----------+            +----------------------+    |
	 * |    | Request handler     +            | Message encoder      |    |
	 * |    +----------+----------+            +-----------+----------+    |
	 * |              /|\                                 \|/              |
	 * |               |                                   |               |
	 * |    +----------+------------+                      |               |
	 * |    | Message+Frame decoder |                      |               |
	 * |    +----------+------------+                      |               |
	 * |              /|\                                  |               |
	 * +---------------+-----------------------------------+---------------+
	 * |               | (3) server response              \|/ (2) client request
	 * +---------------+-----------------------------------+---------------+
	 * |               |                                   |               |
	 * |       [ Socket.read() ]                    [ Socket.write() ]     |
	 * |                                                                   |
	 * |  Netty Internal I/O Threads (Transport Implementation)            |
	 * +-------------------------------------------------------------------+
	 * </pre>
	 *
	 * @return channel handlers
	 *
	 * NettyClient 中的handler
	 */
	public ChannelHandler[] getClientChannelHandlers() {
		NetworkClientHandler networkClientHandler = new CreditBasedPartitionRequestClientHandler();

		return new ChannelHandler[]{
			messageEncoder,
			new NettyMessageClientDecoderDelegate(networkClientHandler),
			networkClientHandler};
	}

}

public Task()
 1、初始化ResultPartition 和 ResultSubPartition
 /*************************************************
		 * TODO  一个task的执行有输入和输出，关于输出的抽象  ResultPatition 和ResultSubPartition
		 *  注释： 初始化 ResultPartitionerWriter 具体实现是 ResultPatition
		 */
		// produced intermediate result partitions
		final ResultPartitionWriter[] resultPartitionWriters = shuffleEnvironment
			// TODO NettyShuffleEnvironment
			.createResultPartitionWriters(taskShuffleContext, resultPartitionDeploymentDescriptors).toArray(new       ResultPartitionWriter[]{});

2、初始化InputGate
        /*************************************************
		 * TODO  一个task的执行有输入和输出，关于输入的抽象  InputGate InputChannel(从上游一个task节点拉取数据) 
		 *       LocalRecoveredInputChannel 或 RemoteRecoveredInputChannel 本地或远程拉取
		 *  注释： 初始化 InputGate
		 */
		// consumed intermediate result partitions
		final IndexedInputGate[] gates = shuffleEnvironment
			// TODO NettyShuffleEnvironment
			.createInputGates(taskShuffleContext, this, inputGateDeploymentDescriptors)
			.toArray(new IndexedInputGate[0]);
	 --> createInputGates -> singleInputGateFactory.create -> createBufferPoolFactory -> createBufferPool
	   -> internalCreateBufferPool ->  new LocalBufferPool(创建一个 LocalBufferPool)

3、包装
        /*************************************************
		 * TODO  包装
		 *  注释： 对上述生成的 ResultPartition 再根据是否需要发回反馈信息等，进行进一步对象的处理
		 */
		this.consumableNotifyingPartitionWriters = ConsumableNotifyingResultPartitionWriterDecorator
			.decorate(resultPartitionDeploymentDescriptors, resultPartitionWriters, this, jobId, resultPartitionConsumableNotifier);

4、 执行 Task 的线程 实例化
		/*************************************************
		 * TODO
		 *  注释： 执行 Task 的线程 实例化 ，TaskExecutor的 task.startTaskThread(); 
		 *  启动线程 转到 Task 的 run() 方法
		 */
		// finally, create the executing thread, but do not start it
		executingThread = new Thread(TASK_THREADS_GROUP, this, taskNameWithSubtask);

-> Task.run

2. SourceStreamTask 和 StreamTask 初始化

首先需要了解的第一个知识点：在最开始一个 job 提交到 Flink standalone 集群运行的时候，在 client
构建 StreamGraph（顶点是 StreamNode，边是 StreamEdge）的时候，会根据用户调用的算子生成
的 Transformation 为 StreamGraph 生成 StreamNode，在生成 StreamNode 的时候，会通过
OpearatorFactory 执行判断，如果该 StreamOperator 是 StreamSource 的话，就会指定该
StreamTask 的 invokableClass 为 SourceStreamTask，否则为（OneInputStreamTask，
TwoInputStreamTask， StreamTask）。核心代码是：

StreamGraph.addOperator(....){
invokableClass = operatorFactory.isStreamSource() ? SourceStreamTask.class :
OneInputStreamTask.class;
}
-> SourceStreamTask
	/*************************************************
	 * TODO
	 *  注释： SourceStreamTask 其实是 Flink job 的最开始的 Task， 毫无疑问，就是对接 Source 的Task
	 *  有一个专门的线程来接收数据： LegacySourceFunctionThread
	 */
	private SourceStreamTask(Environment env, Object lock) throws Exception {

		/*************************************************
		 * TODO
		 *  注释： SynchronizedStreamTaskActionExecutor
		 */
		super(env, null, FatalExitExceptionHandler.INSTANCE, StreamTaskActionExecutor.synchronizedExecutor(lock));

		this.lock = Preconditions.checkNotNull(lock);

		// TODO_MA 注释： 初始化一个线程：LegacySourceFunctionThread
		// TODO_MA 注释： 这是 source 用于产生 data 的一个线程
		// 运行于一个 Task的内部 , 用来给当前这个 SourceStreamTask 接收数据
		this.sourceThread = new LegacySourceFunctionThread();
	}

-> OneInputStreamTask
	public OneInputStreamTask(Environment env) throws Exception {

		// TODO_MA 注释： 调用父类构造
		super(env);
	}
// 以上两个super都进入 StreamTask
super -> StreamTask
	/*************************************************
	 * TODO
	 *  注释： StreamTask 最终的构造方法
	 */
	protected StreamTask(Environment environment, @Nullable TimerService timerService, Thread.UncaughtExceptionHandler uncaughtExceptionHandler,
						 StreamTaskActionExecutor actionExecutor, TaskMailbox mailbox) throws Exception {

		super(environment);

		this.configuration = new StreamConfig(getTaskConfiguration());

		/*************************************************
		 * TODO
		 *  注释： 创建 RecordWriter, 大概率是：ChannelSelectorRecordWriter， 也有可能是个 BroadcastRecordWriter
		 *
		 *  当一个Task真正运行的时候，其实 输入的真正工作的完成 是由 RecordReader完成(批处理)
		 *  输出的真正工作是 RecordWriter完成
		 *
		 *  如果你现在执行的 这个Task 是一个 OperatorChain，必然内部有 多个算子
		 *  最后一个 算子的输出是 RecordWriterOutPut
		 *  前面的算子 的输出就是：ChainingOutput
		 *
		 *  T1(ChainingOutput) ---> T2(ChainingOutput) ---> T3(RecordWriterOutPut)
		 *
		 *
		 */
		this.recordWriter = createRecordWriterDelegate(configuration, environment);

		// TODO_MA 注释： SynchronizedStreamTaskActionExecutor
		this.actionExecutor = Preconditions.checkNotNull(actionExecutor);

		/*************************************************
		 * TODO
		 *  注释： 初始化 StreamTask 的时候，初始化 MailboxProcessor， 同时，执行 StreamTask 的 processInput() 方法
		 *  1、如果为 SourceStreamTask 的话，processInput 方法会启动 SourceStreamTask 的 sourceThread
		 *  2、如果为其他的非 SourceStreamTask 的话，则根据情况（StreamOneInputProcessor 或者 StreamTwoInputProcessor）处理输入情况
		 *  -
		 *  第二个参数：TaskMailboxImpl
		 *  第三个参数：SynchronizedStreamTaskActionExecutor
		 *
		 *  当前 这个Task 接收的要执行的任何的任务 ，都被封装成一个邮件 Mail
		 *  然后被置于 MailBox中
		 *  必然会有 一个 组件 去 轮询 这个MailBox 获取mail来执行处理
		 */
		this.mailboxProcessor = new MailboxProcessor(this::processInput, mailbox, actionExecutor);

		/*************************************************
		 * TODO
		 *  注释： 当这里执行完了， SourceStreamTask 的接收数据线程，就卡在接收数据哪儿了。
		 */
		this.mailboxProcessor.initMetric(environment.getMetricGroup());
		this.mainMailboxExecutor = mailboxProcessor.getMainMailboxExecutor();     // TODO_MA 注释： MailboxExecutorImpl
		this.asyncExceptionHandler = new StreamTaskAsyncExceptionHandler(environment);
		this.asyncOperationsThreadPool = Executors.newCachedThreadPool(new ExecutorThreadFactory("AsyncOperations", uncaughtExceptionHandler));

		/*************************************************
		 * TODO
		 *  注释： 创建 StateBackend
		 *  根据参数 state.backend 来创建响应的 StateBackend
		 *  -
		 *  1、MemoryStateBackend 把状态存储在job manager的内存中
		 *  2、FsStateBackend 把状态存在文件系统中，有可能是本地文件系统，也有可能是HDFS、S3等分布式文件系统
		 *  3、RocksDBStateBackend 把状态存在 RocksDB 中
		 *  -
		 *  按照我们的配置，一般获取到的是 FsStateBackend
		 */
		this.stateBackend = createStateBackend();

		/*************************************************
		 * TODO
		 *  注释： 初始化 SubtaskCheckpointCoordinatorImpl
		 */
		this.subtaskCheckpointCoordinator = new SubtaskCheckpointCoordinatorImpl(

			/*************************************************
			 * TODO
			 *  注释： 创建 CheckpointStorage
			 *  1、FsStateBackend = FsCheckpointStorage
			 */
			stateBackend.createCheckpointStorage(getEnvironment().getJobID()), getName(), actionExecutor, getCancelables(),
			getAsyncOperationsThreadPool(), getEnvironment(), this, configuration.isUnalignedCheckpointsEnabled(), this::prepareInputSnapshot);

		// TODO_MA 注释： 时间语义服务 初始化
		// TODO_MA 注释： ProcessingTime， EventTime， InjestioniTime
		// if the clock is not already set, then assign a default TimeServiceProvider
		if(timerService == null) {
			ThreadFactory timerThreadFactory = new DispatcherThreadFactory(TRIGGER_THREAD_GROUP, "Time Trigger for " + getName());
			this.timerService = new SystemProcessingTimeService(this::handleTimerException, timerThreadFactory);
		} else {
			this.timerService = timerService;
		}

		/*************************************************
		 * TODO
		 *  注释： 创建 Channel 的 IO 线程池
		 */
		this.channelIOExecutor = Executors.newSingleThreadExecutor(new ExecutorThreadFactory("channel-state-unspilling"));
	}

其中在 SourceStreamTask 的 processInput() 方法中，主要是启动接收数据的线程
LegacySourceFunctionThread。
当构造方法完毕的时候，LegacySourceFunctionThread 已经初始化好了，但是 headOperator 还是
null，所以，LegacySourceFunctionThread 还未真正启动。

所以当 ExecutionVertex 真正被提交到 TaskExecutor 中运行的时候，被封装的 Execution 对应的 Task
类的启动类 AbstractInvokable 就是在构建 StreamGraph 的时候指定的对应的 invokableClass。所以
1、如果启动 SourceStreamTask，则启动类是：SourceStreamTask
2、如果启动非 SourceStreamTask，则启动类是：StreamTask

SourceStreamTask 的构造过程。核心入口：

 Task.run -> doRun()
 	/**
	 * 重点 13 步
	 */
	private void doRun() {
		// ----------------------------
		//  Initial State transition
		// ----------------------------
		while(true) {
			ExecutionState current = this.executionState;

			// 第一步 将task状态 由 CREATED 改成：DEPLOYING， 然后退出
			if(current == ExecutionState.CREATED) {
				if(transitionState(ExecutionState.CREATED, ExecutionState.DEPLOYING)) {
					// success, we can start our work
					break;
				}
			} else if(current == ExecutionState.FAILED) {
				// we were immediately failed. tell the TaskManager that we reached our final state
				notifyFinalState();
				if(metrics != null) {
					metrics.close();
				}
				return;
			} else if(current == ExecutionState.CANCELING) {
				if(transitionState(ExecutionState.CANCELING, ExecutionState.CANCELED)) {
					// we were immediately canceled. tell the TaskManager that we reached our final state
					notifyFinalState();
					if(metrics != null) {
						metrics.close();
					}
					return;
				}
			} else {
				if(metrics != null) {
					metrics.close();
				}
				throw new IllegalStateException("Invalid state for beginning of operation of task " + this + '.');
			}
		}

		// all resource acquisitions and registrations from here on
		// need to be undone in the end
		Map<String, Future<Path>> distributedCacheEntries = new HashMap<>();

		// TODO_MA 注释： 当时在构建 ExecutorGraph 的时候，会帮我们把每一个 ExecutorVertex 的启动类都会初始化好，设置在
		// TODO_MA 注释： 设置在 ExecutorVertex 里面
		// TODO_MA 注释： Slot ===> Task ===> ExecutorVertex ===> 启动类
		AbstractInvokable invokable = null;

		try {
			// ----------------------------
			//  Task Bootstrap - We periodically
			//  check for canceling as a shortcut
			// ----------------------------

			// activate safety net for task thread
			LOG.debug("Creating FileSystem stream leak safety net for task {}", this);
			FileSystemSafetyNet.initializeSafetyNetForThread();

			// first of all, get a user-code classloader
			// this may involve downloading the job's JAR files and/or classes
			LOG.info("Loading JAR files for task {}.", this);

			/*************************************************
			 * TODO
			 *  注释： 创建一个用户加载用户代码的类加载器
			 */
			userCodeClassLoader = createUserCodeClassloader();

			/*************************************************
			 * TODO 第二步、准备 ExecutionConfig
			 *  注释：  通过反序列化得到 ExecutionConfig，从 ExecutionConfig 中可以的到所有算子相关的信息
			 */
			final ExecutionConfig executionConfig = serializedExecutionConfig.deserializeValue(userCodeClassLoader);

			if(executionConfig.getTaskCancellationInterval() >= 0) {
				// override task cancellation interval from Flink config if set in ExecutionConfig
				taskCancellationInterval = executionConfig.getTaskCancellationInterval();
			}

			if(executionConfig.getTaskCancellationTimeout() >= 0) {
				// override task cancellation timeout from Flink config if set in ExecutionConfig
				taskCancellationTimeout = executionConfig.getTaskCancellationTimeout();
			}

			if(isCanceledOrFailed()) {
				throw new CancelTaskException();
			}

			// ----------------------------------------------------------------
			// register the task with the network stack
			// this operation may fail if the system does not have enough
			// memory to run the necessary data exchanges
			// the registration must also strictly be undone
			// ----------------------------------------------------------------

			LOG.info("Registering task at network: {}.", this);

			/*************************************************
			 * TODO 第三步 ： 注册输入和输出组件 启动 ResultPartitionWriter 和 InputGate
			 *
			 *  注释： 启动 ResultPartitionWriter 和 InputGate
			 *  向网络栈中注册 Task,为 ResultPartition 和 InputGate 分配缓冲池
			 *  原来在初始化 Task 的时候，就已经把  ResultPartition 和 InputGate 给初始化
			 *  原来在构造 Task 对象的时候，关于输入 和 输出的抽象对象，都已经创建完毕
			 *  其实就是初始化 BufferPool
			 */
			setupPartitionsAndGates(consumableNotifyingPartitionWriters, inputGates);

			/**
			 * 第四步 注册 ResultPartitionWriter 到 taskEventDispatcher
			 *
			 */
			for(ResultPartitionWriter partitionWriter : consumableNotifyingPartitionWriters) {
				taskEventDispatcher.registerPartition(partitionWriter.getPartitionId());
			}

			// next, kick off the background copying of files for the distributed cache
			/**
			 * 第五步：  DistributedCache.readFileInfoFromConfig
			 *  从分布式缓存中 ，拷贝下来一些运行 Task 所需要的资源文件
			 */
			try {
				for(Map.Entry<String, DistributedCache.DistributedCacheEntry> entry : DistributedCache.readFileInfoFromConfig(jobConfiguration)) {
					LOG.info("Obtaining local cache file for '{}'.", entry.getKey());
					Future<Path> cp = fileCache.createTmpFile(entry.getKey(), entry.getValue(), jobId, executionId);
					distributedCacheEntries.put(entry.getKey(), cp);
				}
			} catch(Exception e) {
				throw new Exception(String.format("Exception while adding files to distributed cache of task %s (%s).", taskNameWithSubtask, executionId), e);
			}

			if(isCanceledOrFailed()) {
				throw new CancelTaskException();
			}

			// ----------------------------------------------------------------
			//  call the user code initialization methods
			// ----------------------------------------------------------------

			TaskKvStateRegistry kvStateRegistry = kvStateService.createKvStateTaskRegistry(jobId, getJobVertexId());

			/*************************************************
			 * TODO 第六步：
			 *  注释： 构建一个环境对象RuntimeEnvironment ，包装Task 执行过程中所需要的各种组件
			 */
			Environment env = new RuntimeEnvironment(jobId, vertexId, executionId, executionConfig, taskInfo, jobConfiguration, taskConfiguration,
				userCodeClassLoader, memoryManager, ioManager, broadcastVariableManager, taskStateManager, aggregateManager, accumulatorRegistry,
				kvStateRegistry, inputSplitProvider, distributedCacheEntries, consumableNotifyingPartitionWriters, inputGates, taskEventDispatcher,
				checkpointResponder, operatorCoordinatorEventGateway, taskManagerConfig, metrics, this, externalResourceInfoProvider);

			// Make sure the user code classloader is accessible thread-locally.
			// We are setting the correct context class loader before instantiating the invokable
			// so that it is available to the invokable during its entire lifetime.
			executingThread.setContextClassLoader(userCodeClassLoader);

			/*************************************************
			 * TODO -> 第七步：通过反射 获取启动类实例
			 *  这句代码里会涉及 SourceStreamTask或者StreamTask的初始化
			 *  取决于当前这个 ExecutionVertex 是属于哪一个Operator的
			 *
			 *  当前这个Task 必定属于某一个 ExecutionVertex ， 都有一个启动类的成员变量
			 *  将来这个Task启动的到底是那种具体的Task
			 *
			 *  invokable 类别：SourceStreamTask ， OneInputStreamTask 。。。。。。
			 *  要去找： SourceStreamTask 和 OneInputStreamTask 的带 RuntimeEnviroment参数的构造方法
			 *  -> SourceStreamTask OneInputStreamTask
			 *
			 *
			 *  注释： 获取到代码运行主类
			 *  AbstractInvokable = invokable
			 *
			 *  nameOfInvokableClass 在生成 StreamGraph 的时候，就已经确定了,见StreamGraph.addOperator 方法
			 * TODO  Class<? extends AbstractInvokable> invokableClass =
			 * 			operatorFactory.isStreamSource() ? SourceStreamTask.class : OneInputStreamTask.class;
			 *
			 * nameOfInvokableClass 是 JobVertex 的 invokableClassName， AbstractInvokable = invokable
			 * 每一个 StreamNode 在添加的时候都会有一个 jobVertexClass 属性
			 * 对于一个 operator chain，就是 head operator 对应的 invokableClassName，见 StreamingJobGraphGenerator.createChain
			 * 通过反射创建 AbstractInvokable 对象
			 * 对于 Stream 任务而言，就是 StreamTask 的子类，SourceStreamTask、OneInputStreamTask、TwoInputStreamTask 等
			 *
			 */
			// now load and instantiate the task's invokable code
			invokable = loadAndInstantiateInvokable(userCodeClassLoader, nameOfInvokableClass, env);

			// ----------------------------------------------------------------
			//  actual task core work
			// ----------------------------------------------------------------

			// we must make strictly sure that the invokable is accessible to the cancel() call
			// by the time we switched to running.
			/**
			 * 第八步： 保存该启动实例
			 */
			this.invokable = invokable;

			/*************************************************
			 * TODO 第九步
			 *  注释： 切换task状态 由 DEPLOYING 状态改成： RUNNING
			 */
			// switch to the RUNNING state, if that fails, we have been canceled/failed in the meantime
			if(!transitionState(ExecutionState.DEPLOYING, ExecutionState.RUNNING)) {
				throw new CancelTaskException();
			}

			/**
			 * TODO
			 * 第十步：Task 切换进入 RUNNING 状态，并告知 JobMaster
			 */
			// notify everyone that we switched to running
			taskManagerActions.updateTaskExecutionState(new TaskExecutionState(jobId, executionId, ExecutionState.RUNNING));

			// make sure the user code classloader is accessible thread-locally
			executingThread.setContextClassLoader(userCodeClassLoader);

			/*************************************************
			 * TODO -> 第十一步 ： 启动Task的执行
			 *  注释： 运行任务  在流式应用程序中，都是 StreamTask 的子类
			 *  1、DataSourceTask
			 *  2、Operator
			 *  3、DataSinkTask
			 *
			 *  -
			 * AbstractInvokable 是 Task 执行的主要逻辑，也是所有被执行的任务的基类，包括 Streaming 模式和 Batch 模式。
			 * 在 Streaming 模式下，所有任务都继承自 StreamTask，
			 * 包括 StreamTask 的子类包括 SourceStreamTask, OneInputStreamTask, TwoInputStreamTask,
			 * 以及用于迭代模式下的 StreamIterationHead 和 StreamIterationTail。
			 * -
			 * 每一个 StreamNode 在添加到 StreamGraph 的时候都会有一个关联的 jobVertexClass 属性，
			 * 这个属性就是该 StreamNode 对应的 StreamTask 类型；对于一个 OperatorChain 而言，它所对应的
			 * StreamTask 就是其 head operator 对应的 StreamTask。
			 *
			 *  -> StreamTask
			 */
			// run the invokable
			invokable.invoke();

			// make sure, we enter the catch block if the task leaves the invoke() method due
			// to the fact that it has been canceled
			if(isCanceledOrFailed()) {
				throw new CancelTaskException();
			}

			// ----------------------------------------------------------------
			//  finalization of a successful execution
			// ----------------------------------------------------------------

			/**
			 * 第十二步： ResultPartitionWriter 完成 所有 还未 flush 的数据 flush动作
			 */
			// finish the produced partitions. if this fails, we consider the execution failed.
			for(ResultPartitionWriter partitionWriter : consumableNotifyingPartitionWriters) {
				if(partitionWriter != null) {
					partitionWriter.finish();
				}
			}

			/*************************************************
			 * TODO 第十三步 ： 状态更新
			 *  注释： 由 RUNNING 状态改成： FINISHED 状态
			 */
			// try to mark the task as finished
			// if that fails, the task was canceled/failed in the meantime
			if(!transitionState(ExecutionState.RUNNING, ExecutionState.FINISHED)) {
				throw new CancelTaskException();
			}
------------------------------
重点步骤： 第三步 ： 注册输入和输出组件 启动 ResultPartitionWriter 和 InputGate
setupPartitionsAndGates
	public static void setupPartitionsAndGates(ResultPartitionWriter[] producedPartitions, InputGate[] inputGates) throws IOException {

		/**
		 * TODO
		 * 注册当前task的ResultPartition到启动task的taskManager之上的用来跟踪管理
		 *  ResultPartition的ResultPartitionManager之中
		 */
		for(ResultPartitionWriter partition : producedPartitions) {
			// ResultPartition TODO ConsumableNotifyingResultPartitionWriterDecorator
			partition.setup();
		}

		/**
		 * TODO
		 * 为这个task的InputGate中的InputChannel分配BufferPool
		 */
		// InputGates must be initialized after the partitions, since during InputGate#setup
		// we are requesting partitions
		for(InputGate gate : inputGates) {
			// TODO SingleInputGate
			gate.setup();
		}
	}

3. SourceStreamTask 和 StreamTask 执行

在 beforeInvoke() 中，主要是初始化 OperatorChain，然后调用 init() 执行初始化，然后恢复状态，更
改 Task 自己的状态为 isRunning = true
在 runMailboxLoop() 中，主要是不停的处理 mail，这里是 FLink-1.10 的一项改进，使用了 mailbox
模型来处理任务
在 afterInvoke() 中，主要是完成 Task 要结束之前需要完成的一些细节，比如，把 Buffer 中比 flush 的
数据 flush 出来
最后，在 cleanUpInvoke() 主要做一些资源的释放，执行各种关闭动作：set false，interrupt，
shutdown，close，cleanup，dispose 等

invokable.invoke(); ->  StreamTask.invoke
	public final void invoke() throws Exception {
		try {

			/*************************************************
			 * TODO
			 *  注释： 第一步：初始化OperatorChain，然后调用init初始化，然后恢复状态，更改task的状态为 isRunning
			 *
			 *  如果是 SourceStreamTask 则启动 对接数据源的线程，执行响应的初始化
			 *  如果是 OneInputStreamTask 则需要对接上游的 Task 的 ResultPartition
			 *
			 */
			beforeInvoke();

			// final check to exit early before starting to run
			if(canceled) {
				throw new CancelTaskException();
			}

			/*************************************************
			 * TODO  第二步 、 不停的处理mail 使用mailbox模型来处理任务
			 *  注释： Task 开始工作
			 *  执行这句代码的时候，还是在 Task 所在的那个线程中执行的。
			 */
			// let the task do its work
			runMailboxLoop();

			// if this left the run() method cleanly despite the fact that this was canceled,
			// make sure the "clean shutdown" is not attempted
			if(canceled) {
				throw new CancelTaskException();
			}

			/*************************************************
			 * TODO
			 *  注释： 第三步： 完成task要结束之前需要完成的一些细节，比如：将Buffer中未flush的数据flush出来
			 */
			afterInvoke();

3、1 beforeInvoke()

	protected void beforeInvoke() throws Exception {
		disposedOperators = false;
		LOG.debug("Initializing {}.", getName());

		/*************************************************
		 * TODO
		 *  注释： 构建 OperatorChain 对象，里面会做很多事情
		 *  初始化 output 输出对象
		 *  主要做三件事情：
		 *  1、调用createStreamOutput（）创建对应的下游输出RecordWriterOutput
		 *  2、调用createOutputCollector（）将优化逻辑计划当中Chain中的StreamConfig（也就是数据）写入到第三步创建的RecordWriterOutput中
		 *  3、通过调用getChainedOutputs（）输出结果RecordWriterOutput
		 */
		operatorChain = new OperatorChain<>(this, recordWriter);

		/**
		 *  TODO 注释： 获取 OperatorChain 的第一个 Operator
		 *
		 *  这个初始化后
		 *    SourceSinkTask.processInput controller.suspendDefaultAction();这个代码才放开
		 *     LegacySourceFunctionThread.run 才执行
		 *
		 * 可以认为 接收数据线程中，要用到的 headOpeartor 终于被初始化了。
		 * 其实到此为止，可以认为，在当前 OperatorChain 中要用到的各种组件都已经创建好了，
		 * 可以接收数据，然后开始流式处理了。
		 */
		headOperator = operatorChain.getHeadOperator();

		/*************************************************
		 * TODO
		 *  注释： 执行 SourceStreamTask | OneInputStreamTask 的初始化
		 *  初始化 StreamOneInputProcessor ， DataOutput， DataInput， CheckpointedInputGate
		 *
		 *  对于SourceStreamTask来说就是看source是不是ExternallyInducedSource
		 *  如果是就注册一个savepoint钩子
		 *  对于OneInputStreamTask来说，就是创建CheckpointedInputGate，StreamTaskNetworkOutput，
		 *     StreamTaskNetworkInput，StreamOneInputProcessor
		 *  用来进行shuffer相关的数据传输
		 *
		 * 1、可能是 SourceStreamTask， 对于 SourceStreamTask 来说，只是注册一个 savepoint 钩子
		 * 2、也可能是 OneInputStreamTask
		 *
		 */
		// task specific initialization
		init();

		// save the work of reloading state, etc, if the task is already canceled
		if(canceled) {
			throw new CancelTaskException();
		}

		// -------- Invoke --------
		LOG.debug("Invoking {}", getName());

		// we need to make sure that any triggers scheduled in open() cannot be
		// executed before all operators are opened
		actionExecutor.runThrowing(() -> {

			/*************************************************
			 * TODO
			 *  注释： 状态恢复入口
			 */
			// both the following operations are protected by the lock
			// so that we avoid race conditions in the case that initializeState()
			// registers a timer, that fires before the open() is called.
			operatorChain.initializeStateAndOpenOperators(createStreamTaskStateInitializer());

			/*************************************************
			 * TODO 核心
			 *  注释： 初始化 Mail
			 *  这个地方主要是初始化 InputGate 等输入相关的细节
			 */
			readRecoveredChannelState();
		});

		isRunning = true;
	}

readRecoveredChannelState -> mainMailboxExecutor.execute(this::requestPartitions
 -> inputGate.requestPartitions(); -> SingleInputGate.requestPartitions -> internalRequestPartitions
  -> inputChannel.requestSubpartition
   -> RemoteInputChannel.requestSubpartition
    -> partitionRequestClient.requestSubpartition -> NettyPartitionRequestClient.requestSubpartition
    /*************************************************
		 * TODO
		 *  注释： 发送请求： tcpChannel.writeAndFlush(request);
		 *
		 *   ->  NettyProtocol.PartitionRequestServerHandler.channelRead0 做对应处理
		 */
		if(delayMs == 0) {
			ChannelFuture f = tcpChannel.writeAndFlush(request);
			f.addListener(listener);
		} else {

 -> 服务端 PartitionRequestServerHandler.channelRead0
   -> reader.requestSubpartitionView( ->  partitionProvider.createSubpartitionView( 
    -> partition.createSubpartitionView -> subpartitions[index].createReadView(availabilityListener)
     -> readView = new PipelinedSubpartitionView(this, availabilityListener);

ChainOperator 的初始化，首先会为每个 Operator 创建一个 RecordWriterOutput，再为每个
Operator 创建一个 OutputCollector。然后把每一个 Operator 都包装成 OperatorWrapper 放入
List allOpWrappers 集合中。最后调用linkOperatorWrappers(allOpWrappers);
方法以逻辑正序的方式来构建 StreamOperator 的链式关系。
在这里插入图片描述
然后是 init() 方法，对于 SourceStreamTask 来说，就是看 Source 是不是
ExternallyInducedSource，如果是，则注册一个 savepoint 钩子。对于 OneInputStreamTask 来说，
主要就是创建 CheckpointedInputGate，StreamTaskNetworkOutput，StreamTaskNetworkInput，
StreamOneInputProcessor 用来进行 Shuffle 相关的数据传输。
到此为止，Task 初始化和预执行相关的，都基本到位了，然后就开始从我们的 SourceStreamTask 的
HeadOperator 的数据接收线程，开始流式处理。

3、2 runMailboxLoop （Task 开始工作）

StreamTask.invoke -> runMailboxLoop -> mailboxProcessor.runMailboxLoop() 
 -> runMailboxStep -> runDefaultAction
  -> -> StreamTask this.mailboxProcessor = new MailboxProcessor(this::processInput, mailbox, actionExecutor);
   -> this::processInput (SourceStreamTask|OneInputStreamTask)
      -> SourceStreamTask.processInput -> sourceThread.start() -> LegacySourceFunctionThread.run
       -> headOperator.run(){
             /*************************************************
			 * TODO
			 *  注释： 真正运行用户的 Operator
			 *  1、如果你使用：env.socketTextStream() 则调用： SocketTextStreamFunction
			 *  2、如果你使用：Kafka数据源， 则调用： FlinkKafkaConsumerBase
			 *  ......
			 *  function --> transformation ---> streamOperator
			 *  headOperator.run();
			 */
			userFunction.run(ctx);
        }
         -> SocketTextStreamFunction.run -> ctx.collect(record); -> processAndCollect
          -> output.collect( -> pushToOperator -> processElement
           -> StreamMap.processElement
           	public void processElement(StreamRecord<IN> element) throws Exception {
		/**
		 * element.getValue() 待处理的数据
		 * 1、userFunction.map(element.getValue()) 这是用户自定义的map逻辑，得到map处理之后的结果
		 * 2、然后计算完的结果替换掉当前Operator中的成员变量
		 * 3、然后被StreamMap这个StreamOperator继续收集
		 *
		 * -- OperatorChain.collector  chain中
		 * --
		 * env.socketTextStream.map.keyby.sum
		 * map -> keyby
		 * 如果 当前这个Operator 是一个 OperatChain 中的最后一个 ， 则此处的 outPut = RecordWriterOutput
		 * -> RecordWriterOutput
		 */
		output.collect(element.replace(userFunction.map(element.getValue())));
	}
	 -> pushToRecordWriter -> emit -> RecordWriter.emit(){
	 	protected void emit(T record, int targetChannel) throws IOException, InterruptedException {
		checkErroneous();

		// TODO_MA 注释： 序列化 为 ByteBuffer
		serializer.serializeRecord(record);

		// TODO_MA 注释： 将序列化器中的序列化结果写入目标 channel   -> copyFromSerializerToTargetChannel
		// Make sure we don't hold onto the large intermediate serialization buffer for too long
		if (copyFromSerializerToTargetChannel(targetChannel)) {

			// TODO_MA 注释： 清除序列化使用的buffer（这个是序列化时临时写入的byte[]）,减少内存占用
			serializer.prune();
		}
	}
	 }
	 -> copyFromSerializerToTargetChannel -> flushTargetPartition -> flush 	
	  -> PipelinedSubpartition
	  	public void flush() {

		final boolean notifyDataAvailable;

		// TODO_MA 注释： 先校验，是否满足 数据可用要求
		synchronized(buffers) {
			if(buffers.isEmpty() || flushRequested) {
				return;
			}
			// if there is more then 1 buffer, we already notified the reader
			// (at the latest when adding the second buffer)
			// TODO_MA 注释： 不是 checkpoint 阻塞，buffers大小为 1， 数据可用
			// 执行checkpoint数据会阻塞
			notifyDataAvailable = !isBlockedByCheckpoint && buffers.size() == 1 && buffers.peek().isDataAvailable();

			// TODO_MA 注释： 如果 buffers 数量大于1，证明，之前已经执行了 notifyDataAvailable()
			flushRequested = buffers.size() > 1 || notifyDataAvailable;
		}

		/*************************************************
		 * TODO
		 *  注释： 通知数据可用
		 */
		if(notifyDataAvailable) {

			/**
			 * TODO
			 * 通知 readView ，数据可用了
			 * readView 是ResultSubPartition 的消费者视图对象
			 *   下游的一个task可能会消费上游多个task的某一个分区的数据
			 *   上有任意一个task 的任意一个分区叫做：ResultSubPartition
			 *   这个 ResultSubPartition 对应一个消费者： PipelinedSubpartitionView
			 *
			 */
			notifyDataAvailable();
		}
	}
	 -> notifyDataAvailable(){
	 	/*************************************************
		 * TODO  ->
		 *  注释：
		 *  availabilityListener = 
		 * 1.  CreditBasedSequenceNumberingViewReader
		 * 2.  LocalInputChannel
		 */
		availabilityListener.notifyDataAvailable();
	 }
	 
 /*
 *	1. CreditBasedSequenceNumberingViewReader.notifyDataAvailable()
 *	2. LocalInputChannel.notifyDataAvailable()
 */
// -> 1. CreditBasedSequenceNumberingViewReader.notifyDataAvailable()

 -> readView.notifyDataAvailable(); -> requestQueue.notifyReaderNonEmpty(this);
     加入队列后消费
     ->PartitionRequestQueue.userEventTriggered.enqueueAvailableReader -> writeAndFlushNextMessageIfPossible(){
                   // TODO 此处真正完成 从 NettyServer 写一条数据 到 NettyClient
					// TODO
					channel.writeAndFlush(msg).addListener(writeListener);
           }
          -> NettyProtocol.CreditBasedPartitionRequestClientHandler.channelRead -> decodeMsg
            -> decodeBufferOrEvent -> RemoteInputChannel.onBuffer -> notifyChannelNonEmpty
             -> queueChannel -> inputChannelsWithData.add(channel);
             注释： 加入队列中
			 既然将 有数据可用的channel 加入到 inputChannelsWithData，
			 那就证明，一定有其他的什么角色来从这个队列中获取 可用的channel 来消费数据
        -> StreamTask.new MailboxProcessor(this::processInput, mailbox, actionExecutor);
	   	-> processInput -> inputProcessor.processInput();
		  -> StreamOneInputProcessor.processInput
		   -> StreamTaskNetworkInput.emitNext(){

              processElement(deserializationDelegate.getInstance(), output); -> output.emitRecord(recordOrMark.asRecord());
                -> OneInputStreamTask.StreamTaskNetworkOutput.emitRecord 输出 计算逻辑处理
            
             /*************************************************
			 * TODO 核心
			 *  注释： 获取输入
			 */
			Optional<BufferOrEvent> bufferOrEvent = checkpointedInputGate.pollNext();
			if(bufferOrEvent.isPresent()) {
				// return to the mailbox after receiving a checkpoint barrier to avoid processing of
				// data after the barrier before checkpoint is performed for unaligned checkpoint mode
				if(bufferOrEvent.get().isEvent() && bufferOrEvent.get().getEvent() instanceof CheckpointBarrier) {
					return InputStatus.MORE_AVAILABLE;
				}

				/*************************************************
				 * TODO
				 *  注释： 处理数据（读取到的数据，变成buffer，进行序列化）
				 */
				processBufferOrEvent(bufferOrEvent.get());
			} else {
		  }

-----------------------------------------------------------------
//  2. LocalInputChannel.notifyDataAvailable()
  -> notifyChannelNonEmpty -> queueChannel(){
	  /*************************************************
	 * TODO
	 *  注释： 加入队列中
	 *  既然将 有数据可用的channel 加入到 inputChannelsWithData，
	 *  那就证明，一定有其他的什么角色来从这个队列中获取 可用的channel 来消费数据
	 */
	 inputChannelsWithData.add(channel);
	 
    /**
	 * ->
	 * 如果之前队列中没有channel，这个channel加入后，通知等待的线程 getChannel.inputChannelsWithData.wait
	 */
	inputChannelsWithData.notifyAll(); 
	  
  }
  -> inputChannelsWithData.wait();  // 可反推到 this.mailboxProcessor = new MailboxProcessor(this::processInput, mailbox, actionExecutor);
  StreamTask.processInput -> StreamOneInputProcessor.processInput -> StreamTaskNetworkInput.emitNext(){
	  
		/** 两个成分的代码
		 * 先执行currentRecordDeserializer == null把数据放入buffer    processBufferOrEvent
		 * 再执行 currentRecordDeserializer != null 进行计算并发往下游    processElement
		 */
		while(true) {

			// TODO 如果可以通过 currentRecordDeserializer 反序列化得来结果
			// get the stream element from the deserializer

			if(currentRecordDeserializer != null) {

				// TODO_MA 注释： 进行 Record 的反序列化
				DeserializationResult result = currentRecordDeserializer.getNextRecord(deserializationDelegate);
				if(result.isBufferConsumed()) {
					currentRecordDeserializer.getCurrentBuffer().recycleBuffer();
					currentRecordDeserializer = null;
				}

				/*************************************************
				 * TODO
				 *  注释： 处理记录
				 */
				if(result.isFullRecord()) {

					// TODO ->  计算逻辑处理 并发往下游
					processElement(deserializationDelegate.getInstance(), output);
					return InputStatus.MORE_AVAILABLE;
				}
			}

			/*************************************************
			 * TODO
			 *  注释： 获取输入
			 *  -->
			 */
			Optional<BufferOrEvent> bufferOrEvent = checkpointedInputGate.pollNext();
			if(bufferOrEvent.isPresent()) {
				// return to the mailbox after receiving a checkpoint barrier to avoid processing of
				// data after the barrier before checkpoint is performed for unaligned checkpoint mode
				if(bufferOrEvent.get().isEvent() && bufferOrEvent.get().getEvent() instanceof CheckpointBarrier) {
					return InputStatus.MORE_AVAILABLE;
				}

				/*************************************************
				 * TODO -> 重点
				 *  注释： 处理数据（读取到的数据，变成buffer，进行序列化）
				 */
				processBufferOrEvent(bufferOrEvent.get());
			} else {
				if(checkpointedInputGate.isFinished()) {
					checkState(checkpointedInputGate.getAvailableFuture().isDone(), "Finished BarrierHandler should be available");
					return InputStatus.END_OF_INPUT;
				}
				return InputStatus.NOTHING_AVAILABLE;
			}
		}	  
  }
  //以上逻辑分三步
  // 第一步  获取输入               
  checkpointedInputGate.pollNext() -> inputGate.pollNext(从缓冲区或者 InputGate 中拉取数据); -> getNextBufferOrEvent
    -> waitAndGetNextData -> getChannel(){
	     	/*************************************************
			 * TODO
			 *  注释： 如果现在还没有数据，就阻塞
			 */
			while(inputChannelsWithData.size() == 0) {
				if(closeFuture.isDone()) {
					throw new IllegalStateException("Released");
				}

				/*************************************************
				 * TODO
				 *  注释： 阻塞
				 */
				if(blocking) {
					// TODO  <--
					inputChannelsWithData.wait();
				} else {
					availabilityHelper.resetUnavailable();
					return Optional.empty();
				}
			}
	}

  // 第二步  处理数据（读取到的数据，变成buffer，进行序列化） 
  processBufferOrEvent(bufferOrEvent.get()); -> currentRecordDeserializer.setNextBuffer
  
  // 第三步  计算逻辑处理 并发往下游  
  processElement(deserializationDelegate.getInstance(), output); -> output.emitRecord

画画的老顽童

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
StreamTask 执行

TaskExecutor.submitTask Task task = new Task( // TODO taskExecutorServices.createShuffleEnvironment taskExecutorServices.getShuffleEnvironment()创建ShuffleEnvironment T TaskExecutorServices.createShuffleEnvironment -> createShuffleEnvironment
复制链接

扫一扫