主要内容
1. Task执行原理流程图
2. Task执行源码
3. Task执行结果在Driver端的处理
一、Task在Executor(worker)端执行及返回Driver流程图
图37-1 Driver端与Executor交互图
二、Executor(worker)端执行源码解析
1.接收Driver端发来的消息
当Driver中的SchedulerBackend给ExecutorBackend发送LaunchTask之后,ExecutorBackend在接收到LaunchTask消息后,首先反序列化TaskDescription。
& StandAlone下为SchedulerBackend具体指CoarseGrainedSchedulerBackend,ExecutorBackend指CoarseGrainedExecutorBackend。
//CoarseGrainedExecutorBackend#receive
case LaunchTask(data) =>
if (executor == null) {
//如果不存在Executor则会报错,退出系统
logError("Received LaunchTask command but executor was null")
System.exit(1)
} else {
//反序列化Task,得到TaskDescription信息
val taskDesc = ser.deserialize[TaskDescription](data.value)
logInfo("Got assigned task " + taskDesc.taskId)
//调用executor#launchTask在executor上加载任务
executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,
taskDesc.name, taskDesc.serializedTask)
}
2.Executor加载Task
Executor会通过launchTask来执行Task。
3.调用TaskRunner执行Task
//Executor#launchTask
def launchTask(
context: ExecutorBackend,
taskId: Long,
attemptNumber: Int,
taskName: String,
serializedTask: ByteBuffer): Unit = {
//实例化一个TaskRunner对象来执行Task
val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,
serializedTask)
//将Task加入到正在运行的Task队列
runningTasks.put(taskId, tr)
threadPool.execute(tr)
}
class TaskRunner(
execBackend: ExecutorBackend,
val taskId: Long,
val attemptNumber: Int,
taskName: String,
serializedTask: ByteBuffer)
extends Runnable {//省略非关键代码
override def run(): Unit = {
//为我们的Task创建内存管理器
val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)
//记录反序列化时间
val deserializeStartTime = System.currentTimeMillis()
//加载具体类时需要用到ClassLoader
Thread.currentThread.setContextClassLoader(replClassLoader)
//创建序列化器
val ser = env.closureSerializer.newInstance()
logInfo(s"Running $taskName (TID $taskId)")
//调用ExecutorBackend#statusUpdate向Driver发信息汇报当前状态
execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
//记录运行时间和GC信息
var taskStart: Long = 0
startGCTime = computeTotalGcTime()
try {
//反序列化Task的依赖,得到的结果中有taskFile(运行的文件),taskJar(环境依
//赖),taskBytes(相当于缓冲池)
val (taskFiles, taskJars, taskBytes) =
Task.deserializeWithDependencies(serializedTask)
//下载Task运行缺少的依赖。
updateDependencies(taskFiles, taskJars)
//反序列化Task
task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)
//设置Task运行时的MemoryManager
task.setTaskMemoryManager(taskMemoryManager)
//如果Task在序列化前就已经被killed,则会抛出异常;否则,正常执行
if (killed) {
throw new TaskKilledException
}
logDebug("Task " + taskId + "'s epoch is " + task.epoch)
env.mapOutputTracker.updateEpoch(task.epoch)
//运行的实际任务,并测量它的运行时间。
taskStart = System.currentTimeMillis()
var threwException = true
val (value, accumUpdates) = try {
//调用task#run方法,得到task运行的结果
val res = task.run(
taskAttemptId = taskId,
attemptNumber = attemptNumber,
metricsSystem = env.metricsSystem)
threwException = false
res
} finally {
//清理所有分配的内存和分页,并检测是否有内存泄漏
val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()
if (freedMemory > 0) {
val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"
if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false) && !threwException) {
throw new SparkException(errMsg)
} else {
logError(errMsg)
}
}
}
//记录Task完成时间
val taskFinish = System.currentTimeMillis()
//如果Task killed,则报错。
if (task.killed) {
throw new TaskKilledException
}
//否则序列化得到的Task执行的结果
val resultSer = env.serializer.newInstance()
val beforeSerialization = System.currentTimeMillis()
val valueBytes = resultSer.serialize(value)
val afterSerialization = System.currentTimeMillis()
//记录相关的metrics
for (m <- task.metrics) {
m.setExecutorDeserializeTime(
(taskStart - deserializeStartTime) + task.executorDeserializeTime)
m.setExecutorRunTime((taskFinish - taskStart) - task.executorDeserializeTime)
m.setJvmGCTime(computeTotalGcTime() - startGCTime)
m.setResultSerializationTime(afterSerialization - beforeSerialization)
m.updateAccumulators()
}
//创建直接返回给Driver的结果对象DirectTaskResult
val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
val serializedDirectResult = ser.serialize(directResult)
val resultSize = serializedDirectResult.limit
val serializedResult: ByteBuffer = {
//对直接返回的结果对象大小进行判断
if (maxResultSize > 0 && resultSize > maxResultSize) {
//大于最大限制1G,直接丢弃ResultTask
logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
s"dropping it.")
ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
} else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
//结果大小大于设定的阀值,则放入BlockManager中
val blockId = TaskResultBlockId(taskId)
env.blockManager.putBytes(