Executor.launchTask.threadPool.execute(tr) 执行TaskRunner.run方法
// TODO 执行task真正的业务逻辑
override def run() {
val deserializeStartTime = System.currentTimeMillis()
Thread.currentThread.setContextClassLoader(replClassLoader)
// 获取序列化器
val ser = env.closureSerializer.newInstance()
logInfo(s"Running $taskName (TID $taskId)")
execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
var taskStart: Long = 0
startGCTime = gcTime
try {
// 对序列化的task数据进行反序列化
val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)
// TODO 通过网络通信 将需要的文件、资源、jar拷贝过来
updateDependencies(taskFiles, taskJars)
// 反序列化操作,将整个task的数据集反序列化回来
task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)
// If this task has been killed before we deserialized it, let's quit now. Otherwise,
// continue executing the task.
if (killed) {
// Throw an exception rather than returning, because returning within a try{} block
// causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl
// exception will be caught by the catch block, leading to an incorrect ExceptionFailure
// for the task.
throw new TaskKilledException
}
attemptedTask = Some(task)
logDebug("Task " + taskId + "'s epoch is " + task.epoch)
env.mapOutputTracker.updateEpoch(task.epoch)
// Run the actual task and measure its runtime.
// task开始时间
taskStart = System.currentTimeMillis()
/**
* 执行task
* 这里的value对于shuffleMapTask来说,其实就是MapStatus
* 封装了ShuffleMapTask计算的数据,输出的位置
* 后面,如果还有一个shuffleMapTask,就会去联系MapOutPutTracker,
* 来获得上一个上一个shuffleMapTask的输出位置,然后通过网络拉取数据
* ResultTask也是一样
*
* TODO
*/
val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber)
// task结束时间
val taskFinish = System.currentTimeMillis()
// If the task has been killed, let's fail it.
if (task.killed) {
throw new TaskKilledException
}
// 这个 其实就是对MapStatus进行了各种序列化和封装,因为后面要发送给driver(通过网络)
val resultSer = env.serializer.newInstance()
val beforeSerialization = System.currentTimeMillis()
val valueBytes = resultSer.serialize(value)
val afterSerialization = System.currentTimeMillis()
// 计算task相关的一些metrics,就是统计信息
// 这里包括运行了多长时间,反序列化耗费了多长时间,java虚拟机gc耗费了多长时间,结果的序列化耗费了多长时间
// 这些东西,其实会在我们的sparkUI上显示
for (m <- task.metrics) {
m.setExecutorDeserializeTime(taskStart - deserializeStartTime)
m.setExecutorRunTime(taskFinish - taskStart)
m.setJvmGCTime(gcTime - startGCTime)
m.setResultSerializationTime(afterSerialization - beforeSerialization)
}
val accumUpdates = Accumulators.values
val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
val serializedDirectResult = ser.serialize(directResult)
val resultSize = serializedDirectResult.limit
// directSend = sending directly back to the driver
val serializedResult = {
if (maxResultSize > 0 && resultSize > maxResultSize) {
logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
s"dropping it.")
ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
} else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
val blockId = TaskResultBlockId(taskId)
env.blockManager.putBytes(
blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
logInfo(
s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
} else {
logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")
serializedDirectResult
}
}
// 调用了Executor所在的CoraseGrainedExecutorBackend的statusUpdate方法
execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
} catch {
case ffe: FetchFailedException => {
val reason = ffe.toTaskEndReason
execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
}
case _: TaskKilledException | _: InterruptedException if task.killed => {
logInfo(s"Executor killed $taskName (TID $taskId)")
execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))
}
case cDE: CommitDeniedException => {
val reason = cDE.toTaskEndReason
execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
}
case t: Throwable => {
// Attempt to exit cleanly by informing the driver of our failure.
// If anything goes wrong (or this was a fatal exception), we will delegate to
// the default uncaught exception handler, which will terminate the Executor.
logError(s"Exception in $taskName (TID $taskId)", t)
val serviceTime = System.currentTimeMillis() - taskStart
val metrics = attemptedTask.flatMap(t => t.metrics)
for (m <- metrics) {
m.setExecutorRunTime(serviceTime)
m.setJvmGCTime(gcTime - startGCTime)
}
val reason = new ExceptionFailure(t, metrics)
execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
// Don't forcibly exit unless the exception was inherently fatal, to avoid
// stopping other tasks unnecessarily.
if (Utils.isFatalError(t)) {
SparkUncaughtExceptionHandler.uncaughtException(t)
}
}
} finally {
// Release memory used by this thread for shuffles
env.shuffleMemoryManager.releaseMemoryForThisThread()
// Release memory used by this thread for unrolling blocks
env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()
// Release memory used by this thread for accumulators
Accumulators.clear()
runningTasks.remove(taskId)
}
}
}
=> task.run
final def run(taskAttemptId: Long, attemptNumber: Int): T = {
/**
* 创建一个TaskContext,就是task的执行上下文
* 里面记录了task执行的一些全局性的数据
* 比如task重试了几次,包括task属于哪个stage,task要处理的是rdd的那个partition,等等
*/
context = new TaskContextImpl(stageId = stageId, partitionId = partitionId,
taskAttemptId = taskAttemptId, attemptNumber = attemptNumber, runningLocally = false)
TaskContextHelper.setTaskContext(context)
context.taskMetrics.setHostname(Utils.localHostName())
taskThread = Thread.currentThread()
if (_killed) {
kill(interruptThread = false)
}
try {
//TODO 抽象方法runTsk,依赖子类的实现,task的子类实现:SuffleMapTask,ResultTask
runTask(context)
} finally {
context.markTaskCompleted()
TaskContextHelper.unset()
}
}
==>runTask -> ShuffleMapTask | ResultTask
ShuffleMapTask.runTask
/**
* 一个ShuffleMapTask会将一个RDD的元素,切分为多个bucket
* 基于一个在ShuffleDependency中指定的partitoner,默认是hashPartitioner
*/
private[spark] class ShuffleMapTask(
...
// shuffleMapTask的runTask方法有MapStatus返回值
override def runTask(context: TaskContext): MapStatus = {
/**
* Deserialize the RDD using the broadcast variable.
* 对task要处理的rdd相关的数据,做一些反序列化操作
* 这个rdd是怎么拿到的:
* 多个task运行在多个executor上,都是并行运行或者并发运行的,可能都不不在一个地方
* 一个stage的task要处理的rdd是一样的
* task会通过broadcast variable直接拿到
*/
val ser = SparkEnv.get.closureSerializer.newInstance()
val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
metrics = Some(context.taskMetrics)
var writer: ShuffleWriter[Any, Any] = null
try {
// 获取ShuffleManager
// 从shuffleManager中获取ShuffleWriter
val manager = SparkEnv.get.shuffleManager
writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
/**
* 首先调用了rdd的iterator方法,并且传入了当前task要处理哪个partition
* 所以核心逻辑就在rdd的iterator方法中,在这里就实现了针对rdd的某个partition
* 执行我们定义的算子,或者函数
* 针对rdd的partition执行了处理,返回的数据都是通过ShuffleWriter经过HashPartitoner进行分区后
* 写入自己的bucket
*
* TODO rdd.iterator(partition, context)
*/
writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
/**
* 最后返回的结果 MapStatus
* MapStatus里面封装了ShuffleMapTask计算后的数据,存储在哪里,其实就是BlockManager相关的信息
* BlockManager是spark底层的内存、数据、磁盘数据管理的组件
*/
return writer.stop(success = true).get
} catch {
case e: Exception =>
try {
if (writer != null) {
writer.stop(success = false)
}
} catch {
case e: Exception =>
log.debug("Could not stop writer", e)
}
throw e
}
}
===> writer.write(rdd.iterator -> rdd.iterator
final def iterator(split: Partition, context: TaskContext): Iterator[T] = {
// 如果StorageLevel不为NONE,之前持久化过RDD,那么就不要直接去从父RDD执行算子,计算新的RDD的partition了
// 优先尝试使用CacheManager,去获取持久化的数据
if (storageLevel != StorageLevel.NONE) {
SparkEnv.get.cacheManager.getOrCompute(this, split, context, storageLevel)
} else {
// TODO 进行rdd partition的计算
computeOrReadCheckpoint(split, context)
}
}
====> computeOrReadCheckpoint -> MapPartitionsRDD.compute
/**
* 针对RDD的某个partition执行我们给这个RDD定义的算子和函数
* f: 可以理解为我们自己定义的算子和函数,spark内部进行了封装的,还实现了一些其他的逻辑,调用到这里为止
* 其实就是在针对rdd的partiton,执行自己的计算操作,并返回新的rdd的partition数据
*/
override def compute(split: Partition, context: TaskContext) =
f(context, split.index, firstParent[T].iterator(split, context))