1. spark代码示例
public final class JavaWordCount {
private static final Pattern SPACE = Pattern.compile(" ");
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Usage: JavaWordCount <file>");
System.exit(1);
}
//创建sparksession对象
SparkSession spark = SparkSession
.builder()
.appName("JavaWordCount")
.getOrCreate();
//读取数据源
JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();
//数据处理
JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));
JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);
//触发action计算
List<Tuple2<String, Integer>> output = counts.collect();
for (Tuple2<?,?> tuple : output) {
System.out.println(tuple._1() + ": " + tuple._2());
}
spark.stop();
}
}
2.action算子触发job提交
- 当spark执行到action算子的时候,就会触发job的提交
- 任务提交的执行链是:action算子==》sparkContext==>DAGScheduler==>TaskScheduler==>SchedulerBackend
- action算子首先是通过sparkContext去运行,sparkContext会将job转交给SparkContext内部组件DAGScheduler继续运行,做Stage的切分 ,最后通过TaskScheduler将任务提交到executor执行
//从collect action算子触发说起
counts.collect();
// 进入rdd中的collect方法中,返回一个包含该RDD中所有元素的数组。
def collect(): Array[T] = withScope {
val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
Array.concat(results: _*)
}
//进入runJob方法,这里进入的是sparkcontext类中的runjob方法
def runJob[T, U: ClassTag](rdd: RDD[T], func: Iterator[T] => U): Array[U] = {
runJob(rdd, func, 0 until rdd.partitions.length)
}
//进入上述方法中的重载方法runJob
def runJob[T, U: ClassTag](
rdd: RDD[T],
func: Iterator[T] => U,
partitions: Seq[Int]): Array[U] = {
val cleanedFunc = clean(func)
//该方法返回的是数组,进入方法runJob
runJob(rdd, (ctx: TaskContext, it: Iterator[T]) => cleanedFunc(it), partitions)
}
def runJob[T, U: ClassTag](
rdd: RDD[T],
func: (TaskContext, Iterator[T]) => U,
partitions: Seq[Int]): Array[U] = {
val results = new Array[U](partitions.size)
// index是分区下标 res是分区对应的结果
runJob[T, U](rdd, func, partitions, (index, res) => results(index) = res)
results
}
//进入上一步的runJob方法中 ,这里的核心方法是dagscheduler.runJob
def runJob[T, U: ClassTag](
rdd: RDD[T],
func: (TaskContext, Iterator[T]) => U,
partitions: Seq[Int],
resultHandler: (Int, U) => Unit): Unit = {
val callSite = getCallSite
val cleanedFunc = clean(func)
//核心方法,进入的是dagScheduler的runjob方法
dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, resultHandler, localProperties.get)
progressBar.foreach(_.finishAll())
//checkpoint的入口
rdd.doCheckpoint()
}
//进入dagScheduler类中的runJob方法中
def runJob[T, U](
rdd: RDD[T],
func: (TaskContext, Iterator[T]) => U,
partitions: Seq[Int],
callSite: CallSite,
resultHandler: (Int, U) => Unit,
properties: Properties): Unit = {
val start = System.nanoTime
//异步提交执行,summitJob是核心方法
val waiter: JobWaiter[U] = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
//等待waiter的执行结果,阻塞
ThreadUtils.awaitReady(waiter.completionFuture, Duration.Inf)
//获取执行结果
waiter.completionFuture.value.get match {
case scala.util.Success(_) =>
logInfo("Job %d finished: %s, took %f s".format
(waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
case scala.util.Failure(exception) =>
logInfo("Job %d failed: %s, took %f s".format
(waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
val callerStackTrace = Thread.currentThread().getStackTrace.tail
exception.setStackTrace(exception.getStackTrace ++ callerStackTrace)
throw exception
}
}
//进入submitJob方法中
def submitJob[T, U](
rdd: RDD[T],
func: (TaskContext, Iterator[T]) => U,
partitions: Seq[Int],
callSite: CallSite,
resultHandler: (Int, U) => Unit,
properties: Properties): JobWaiter[U] = {
//获取任务id
val jobId = nextJobId.getAndIncrement()
val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)
//往 eventProcessLoop提交了JobSubmitted事件,这是核心方法
eventProcessLoop.post(JobSubmitted(
jobId, rdd, func2, partitions.toArray, callSite, waiter,
SerializationUtils.clone(properties)))
waiter
}
//进入JobSubmitted消息的处理方法中
private def doOnReceive(event: DAGSchedulerEvent): Unit = event match {
//JobSubmitted消息的处理 ,重点是handleJobSubmitted方法
case JobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties) =>
dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties)
}
//进入handleJobSubmitted方法的处理 ,重点是stage的切分和stage的提交
private[scheduler] def handleJobSubmitted(jobId: Int,
finalRDD: RDD[_],
func: (TaskContext, Iterator[_]) => _,
partitions: Array[Int],
callSite: CallSite,
listener: JobListener,
properties: Properties) {
var finalStage: ResultStage = null
try {
//确定最后的stage
finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)
} catch {
...
}
// Job submitted, clear internal data.
barrierJobIdToNumTasksCheckFailures.remove(jobId)
...
//提交stage
submitStage(finalStage)
}
3. DAGScheduler的Stage切分详解
- stage切分的的核心入口 ,去创建最后一个Stage方法
- 在dag划分中,最后一个stage是resultStage,之前的所有stage都是ShuffleMapStage
- 划分stage的依据是rdd之间是否发生了宽依赖task
- stage的划分过程:从最后一个 RDD 开始,调用递归,从后往前推,找该 RDD 和父 RDD 之间的依赖关系,如果是窄依赖,会继续找父 RDD 的父 RDD,如果是宽依赖,就会从该 RDD 开始到前面所有的 RDD 划分为一个 stage,递归的出口是直到找不到父 RDD,最后把所有的 RDD 划分为一个 stage
- 递归的核心方法是getOrCreateShuffleMapStage。这个方法维护了一个stack栈,用于保存该rdd的父rdd;一个hashset,保存了宽依赖类集合。不停遍历stack中的rdd,看其中的dependency是否是宽依赖,如果是则加入到hashset中,如果不是,再递归寻找父rdd,直到没有父rdd为止。
//创建finalStage
finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)
// 进入createResultStage方法中
private def createResultStage(
rdd: RDD[_],
func: (TaskContext, Iterator[_]) => _,
partitions: Array[Int],
jobId: Int,
callSite: CallSite): ResultStage = {
...
//创建父stage
val parents: List[Stage] = getOrCreateParentStages(rdd, jobId)
val id = nextStageId.getAndIncrement()
//创建结果stage
val stage = new ResultStage(id, rdd, func, partitions, parents, jobId, callSite)
stageIdToStage(id) = stage
//建立job和stage的映射关系
updateJobIdStageIdMaps(jobId, stage)
stage
}
// 进入getOrCreateParentStages方法去获取父Stage的全部信息
private def getOrCreateParentStages(rdd: RDD[_], firstJobId: Int): List[Stage] = {
//getShuffleDependencies 获取到上一级的父shuffle rdd
getShuffleDependencies(rdd).map { shuffleDep =>
getOrCreateShuffleMapStage(shuffleDep, firstJobId)
}.toList
//进入getShuffleDependencies方法,返回的是上一个的可以划分stage的依赖信息
private[scheduler] def getShuffleDependencies(
rdd: RDD[_]): HashSet[ShuffleDependency[_, _, _]] = {
//返回的是父宽依赖的hash集合
val parents = new HashSet[ShuffleDependency[_, _, _]]
//已经访问过的rdd集合
val visited = new HashSet[RDD[_]]
//等待访问的rdd的集合,这里用栈来存储
val waitingForVisit = new ArrayStack[RDD[_]]
//将dag图中的finalRdd压入栈
waitingForVisit.push(rdd)
//当等待访问的rdd集合不为空
while (waitingForVisit.nonEmpty) {
//弹出rdd
val toVisit = waitingForVisit.pop()
//如果已经访问的rdd集合中没有这个rdd
if (!visited(toVisit)) {
//将rdd加入到已经访问的集合中
visited += toVisit
toVisit.dependencies.foreach {
case shuffleDep: ShuffleDependency[_, _, _] =>
//如果是shuffle依赖,就放入父宽依赖的hash集合中
parents += shuffleDep
case dependency =>
//如果不是shuffle依赖,就把这个rdd的父rdd放入待访问的栈中
waitingForVisit.push(dependency.rdd)
}
}
}
parents
}
//进入 getOrCreateShuffleMapStage(shuffleDep, firstJobId)
//这里是递归的核心方法
private def getOrCreateShuffleMapStage(
shuffleDep: ShuffleDependency[_, _, _],
firstJobId: Int): ShuffleMapStage = {
//shuffleIdToMapStage 已经创建的shuffleStage集合
shuffleIdToMapStage.get(shuffleDep.shuffleId) match {
//如果有该stage,直接返回
case Some(stage) =>
stage
//如果没有该stage,进入getMissingAncestorShuffleDependencies方法去创建新stage
case None =>
// Create stages for all missing ancestor shuffle dependencies.
getMissingAncestorShuffleDependencies(shuffleDep.rdd).foreach { dep =>
if (!shuffleIdToMapStage.contains(dep.shuffleId)) {
createShuffleMapStage(dep, firstJobId)
}
}
// Finally, create a stage for the given shuffle dependency.
createShuffleMapStage(shuffleDep, firstJobId)
}
}
// 进入getMissingAncestorShuffleDependencies方法
private def getMissingAncestorShuffleDependencies(
rdd: RDD[_]): ArrayStack[ShuffleDependency[_, _, _]] = {
//宽依赖的rdd
val ancestors = new ArrayStack[ShuffleDependency[_, _, _]]
//已经访问的rdd
val visited = new HashSet[RDD[_]]
// 等待访问的rdd栈
val waitingForVisit = new ArrayStack[RDD[_]]
// 将进入该方法的rdd压入到等待访问的rdd栈中
waitingForVisit.push(rdd)
while (waitingForVisit.nonEmpty) {
val toVisit = waitingForVisit.pop()
if (!visited(toVisit)) {
visited += toVisit
getShuffleDependencies(toVisit).foreach { shuffleDep =>
if (!shuffleIdToMapStage.contains(shuffleDep.shuffleId)) {
ancestors.push(shuffleDep)
waitingForVisit.push(shuffleDep.rdd)
}
}
}
}
ancestors
}
4. TaskScheduler提交Task详解
4.1 TaskScheduler提交Task在DAGScheduler的运行过程
submitStage方法的逻辑是:
- 获取finalStage的所有父stage,这里的逻辑是getMissingParentStages方法,核心逻辑是利用rdd.dependency属性进行逻辑判断,如果是宽依赖则创建stage并将该stage加入到父stage的结果集hashset中;如果是窄依赖则加入到等待访问的waittingStages集合中。通过栈的压入和弹出重复上述的逻辑,直到栈空结束程序。
- 将父stage集合里的stage根据stage的id进行升序排序 ,missing变量是父Stages的集合
- 对missing变量进行判断是否为空,如果为空,则代表没有父Stages,执行submitMissingTasks方法提交task,如果不为空,则遍历missing集合,递归执行submitStage方法
- submitMissingTasks方法的核心逻辑是将任务提交给taskScheduler执行
- 明确该stage的所有待计算分区的id,封装到partitionsToCompute变量中
- 根据partitionsToCompute的id属性去明确数据本地性,并将结果保存在taskIdToLocations变量中,数据结构是Map[Int, Seq[TaskLocation]],其中key代表的是分区id,value代表的是副本所在的位置信息
- 构建任务集tasks,如果是finalStage,则创建ResultTask,如果是shuffleMapStage,则创建ShuffleMapTask
- 将任务封装成taskSet,并将任务通过taskScheduler.submitTasks方法进行提交
//从dagscheduler.handleMapStageSubmitted方法进入
// 提交stage的方法
submitStage(finalStage)
private def submitStage(stage: Stage) {
val jobId = activeJobForStage(stage)
if (jobId.isDefined) {
logDebug("submitStage(" + stage + ")")
//没有提交过的stage
if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
//获取父Stage并且根据stage的id进行升序排序
val missing: List[Stage] = getMissingParentStages(stage).sortBy(_.id)
logDebug("missing: " + missing)
if (missing.isEmpty) {
logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
//如果没有父Stage,则提交当前stage的代码
submitMissingTasks(stage, jobId.get)
} else {
// 如果有父Stage
for (parent <- missing) {
//先提交父stage
submitStage(parent)
}
//将该stage加入到等待运行的stage集合中
waitingStages += stage
}
}
} else {
...
}
}
//进入getMissingParentStages方法中,获取finalStage的所有父Stage
private def getMissingParentStages(stage: Stage): List[Stage] = {
//父Stage的集合
val missing = new HashSet[Stage]
//已经访问过的rdd的集合
val visited = new HashSet[RDD[_]]
// 等待访问的rdd的集合,这里用栈进行保存
val waitingForVisit = new ArrayStack[RDD[_]]
def visit(rdd: RDD[_]) {
if (!visited(rdd)) {
visited += rdd
val rddHasUncachedPartitions = getCacheLocs(rdd).contains(Nil)
if (rddHasUncachedPartitions) {
for (dep <- rdd.dependencies) {
//根据rdd的dependencies属性进行判断
dep match {
//如果是宽依赖
case shufDep: ShuffleDependency[_, _, _] =>
//递归创建父stage的方法
val mapStage = getOrCreateShuffleMapStage(shufDep, stage.firstJobId)
if (!mapStage.isAvailable) {
missing += mapStage
}
//如果是窄依赖,则加入到等待访问的rdd集合中
case narrowDep: NarrowDependency[_] =>
waitingForVisit.push(narrowDep.rdd)
}
}
}
}
}
waitingForVisit.push(stage.rdd)
while (waitingForVisit.nonEmpty) {
visit(waitingForVisit.pop())
}
missing.toList
}
// 将任务提交给taskScheduler进行调度执行
private def submitMissingTasks(stage: Stage, jobId: Int) {
// 待计算的stage的所有分区id
val partitionsToCompute: Seq[Int] = stage.findMissingPartitions()
// 将该stage加入到正在运行的stage集合中
runningStages += stage
// 计算数据本地性,task对应的tasklocation,假设是三个副本,则是三个副本的存储所在地
val taskIdToLocations: Map[Int, Seq[TaskLocation]] = try {
stage match {
case s: ShuffleMapStage =>
partitionsToCompute.map { id => (id, getPreferredLocs(stage.rdd, id))}.toMap
case s: ResultStage =>
partitionsToCompute.map { id =>
val p = s.partitions(id)
(id, getPreferredLocs(stage.rdd, p))
}.toMap
}
} catch {...}
...
//对该stage的rdd和依赖执行信息进行序列化并广播给executor
RDDCheckpointData.synchronized {
taskBinaryBytes = stage match {
case stage: ShuffleMapStage =>
JavaUtils.bufferToArray(
closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
case stage: ResultStage =>
JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
}
partitions = stage.rdd.partitions
}
taskBinary = sc.broadcast(taskBinaryBytes)
} catch {
...
}
//构建任务集合tasks
//如果是finalStage,则创建ResultTask
//如果是shuffleMapStage,则创建ShuffleMapTask
val tasks: Seq[Task[_]] = try {
val serializedTaskMetrics = closureSerializer.serialize(stage.latestInfo.taskMetrics).array()
stage match {
case stage: ShuffleMapStage =>
stage.pendingPartitions.clear()
partitionsToCompute.map { id =>
val locs = taskIdToLocations(id)
val part = partitions(id)
stage.pendingPartitions += id
//构建ShuffleMapTask stage的id,stage.id数据的本地性信息locs,jobid
new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
Option(sc.applicationId), sc.applicationAttemptId, stage.rdd.isBarrier())
}
case stage: ResultStage =>
partitionsToCompute.map { id =>
val p: Int = stage.partitions(id)
val part = partitions(p)
val locs = taskIdToLocations(id)
//构建ResultTask stage的id,stage.id数据的本地性信息locs,jobid
new ResultTask(stage.id, stage.latestInfo.attemptNumber,
taskBinary, part, locs, id, properties, serializedTaskMetrics,
Option(jobId), Option(sc.applicationId), sc.applicationAttemptId,
stage.rdd.isBarrier())
}
}
} catch {
....
}
if (tasks.size > 0) {
logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
//真正的提交task代码,将任务封装成taskSet并提交给taskScheduler
taskScheduler.submitTasks(new TaskSet(
tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties))
} else {
...}
}
4.2 TaskScheduler提交Task在taskScheduler和SchedulerBackend中的运行过程
运行入口是:
taskScheduler.submitTasks(new TaskSet(
tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties))
直接进入到实现类TaskSchedulerImpl中
override def submitTasks(taskSet: TaskSet) {
//取出任务集
val tasks = taskSet.tasks
//任务调度过程加锁
this.synchronized {
//manager是 管理tasks集合中每个task的执行成功与否状态
val manager = createTaskSetManager(taskSet, maxTaskFailures)
//创建一个stageTaskSets对象
val stageTaskSets =
taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager])
}
//任务调度是启动一个定时任务
if (!isLocal && !hasReceivedTask) {
starvationTimer.scheduleAtFixedRate(new TimerTask() {
override def run() {
if (!hasLaunchedTask) {
logWarning("Initial job has not accepted any resources; " +
"check your cluster UI to ensure that workers are registered " +
"and have sufficient resources")
} else {
this.cancel()
}
}
}, STARVATION_TIMEOUT_MS, STARVATION_TIMEOUT_MS)
}
hasReceivedTask = true
}
// 执行 backend.reviveOffers方法
backend.reviveOffers()
}
}
//进入backend.reviveOffers()方法
//CoarseGrainedSchedulerBackend类
override def reviveOffers() {
//向driver发送ReviveOffers消息
driverEndpoint.send(ReviveOffers)
}
//进入ReviveOffers的消息处理
case ReviveOffers =>
makeOffers()
//进入到makeOffers方法中
private def makeOffers() {
//创建任务描述
val taskDescs = withLock {
//过滤出可以使用的executor
val activeExecutors = executorDataMap.filterKeys(executorIsAlive)
//workOffers里保存了executor的具体信息,包括地址和cpuCores资源情况
val workOffers = activeExecutors.map {
case (id, executorData) =>
new WorkerOffer(id, executorData.executorHost, executorData.freeCores,
Some(executorData.executorAddress.hostPort))
}.toIndexedSeq
//资源邀约 task和executor具体的映射关系方法,具体是taskScheduler执行
scheduler.resourceOffers(workOffers)
}
if (!taskDescs.isEmpty) {
// 将任务部署到executor执行
launchTasks(taskDescs)
}
}
//进入到scheduler.resourceOffers(workOffers)方法执行
def resourceOffers(offers: IndexedSeq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
...
//对资源进行洗牌
val shuffledOffers = shuffleOffers(filteredOffers)
// 将offer和task进行匹配
val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores / CPUS_PER_TASK))
val availableCpus = shuffledOffers.map(o => o.cores).toArray
val availableSlots = shuffledOffers.map(o => o.cores / CPUS_PER_TASK).sum
val sortedTaskSets = rootPool.getSortedTaskSetQueue
//遍历taskset中的task
for (taskSet <- sortedTaskSets) {
if (newExecAvail) {
//真正进行task和offer匹配的方法,一般来说是根据数据本地性进行任务匹配
taskSet.executorAdded()
}
}
}
// 进入到launchTasks方法中
private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
...
//核心方法是向executor的endpoint发送LaunchTask消息,里面封装了序列化的task
executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
}
5. Executor执行Task详解
Executor执行Task的入口是CoarseGrainedExecutorBackend类中的LaunchTask消息的处理
// executor执行LaunchTask消息处理
case LaunchTask(data) =>
if (executor == null) {
exitExecutor(1, "Received LaunchTask command but executor was null")
} else {
// 反序列化任务描述
val taskDesc = TaskDescription.decode(data.value)
logInfo("Got assigned task " + taskDesc.taskId)
//executor真正执行任务的入口
executor.launchTask(this, taskDesc)
}
executor.launchTask(this, taskDesc)
- 首先将任务封装成taskRunner对象,里面保存了task的信息和环境信息,然后将任务提交给线程池执行
- 线程池执行taskRunner的run方法,里面的过程如下
- 先将任务的状态更改成running
- 将任务信息执行反序列化
- 真正执行任务task.run,具体后续分析
- 任务执行结束之后释放资源,主要是释放锁和内存资源
- 返回数据结果,根据数据结果的大小进行返回,如果数据小于10M,则直接返回数据,如果大于10M,则将数据在blockManager中,仅返回blockManager的地址即可
- 根据任务的执行情况更改状态,一般是finished
//进入到Executor类中的launchTask方法中
def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
//封装taskRunner对象
val tr = new TaskRunner(context, taskDescription)
runningTasks.put(taskDescription.taskId, tr)
// 将任务提交到线程池中执行
threadPool.execute(tr)
}
//进入到taskRunner的run方法中
override def run(): Unit = {
...
//taskMemoryManager ,用于管理task的内存
val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)
// 将任务的状态变更为running
execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
//将任务信息进行反序列化
task = ser.deserialize[Task[Any]](
taskDescription.serializedTask, Thread.currentThread.getContextClassLoader)
...
//任务的真正执行
val value = Utils.tryWithSafeFinally {
val res = task.run(
taskAttemptId = taskId,
attemptNumber = taskDescription.attemptNumber,
metricsSystem = env.metricsSystem)
threwException = false
res
}
//执行结束之后释放锁和内存资源
val releasedLocks = env.blockManager.releaseAllLocksForTask(taskId)
val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()
// 将结果执行序列化
val valueBytes = resultSer.serialize(value)
//封装一个直接结果对象directResult
val directResult = new DirectTaskResult(valueBytes, accumUpdates)
//directResult 的序列化
val serializedDirectResult = ser.serialize(directResult)
//计算结果大小
val resultSize = serializedDirectResult.limit()
// 消息传回driver
// maxResultSize默认是1G
// maxDirectResultSize 默认是10M
val serializedResult: ByteBuffer = {
if (maxResultSize > 0 && resultSize > maxResultSize) {
//返回的是元数据
ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
} else if (resultSize > maxDirectResultSize) {
val blockId = TaskResultBlockId(taskId)
// 结果保存在blockmanager里面
env.blockManager.putBytes(
blockId,
new ChunkedByteBuffer(serializedDirectResult.duplicate()),
StorageLevel.MEMORY_AND_DISK_SER)
ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
} else {
//小结果直接返回
serializedDirectResult
}
}
// task发消息给到executor,发送statusUpdate
execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
}
6.Drirver接收Task执行情况消息
- task任务执行结束之后,会向ExecutorBackend发送stateUpdate消息
- ExecutorBackend接收到stateUpdate消息之后,会转发消息给到driver端
- driver接收到stateUpdate消息后进行处理,将执行成功的task加入到执行成功队列中
- 在队列中,实际上是将任务结果返回给到taskScheduler
- taskScheduler接收到返回的结果会交给taskSetManager进行处理。taskSetManager会对执行的task结果做三件事情 1. 执行成功计数 2. 标记 3.taskSet结束判定
//进入CoarseGrainedExecutorBackend类中的statusUpdate方法
override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
val msg = StatusUpdate(executorId, taskId, state, data)
driver match {
// 向driver发送StatusUpdate消息
case Some(driverRef) => driverRef.send(msg)
case None => logWarning(s"Drop $msg because has not yet connected to driver")
}
}
//跟进driver的StatusUpdate消息处理
case StatusUpdate(executorId, taskId, state, data) =>
// taskscheduler进行处理
scheduler.statusUpdate(taskId, state, data.value)
if (TaskState.isFinished(state)) {
executorDataMap.get(executorId) match {
case Some(executorInfo) =>
executorInfo.freeCores += scheduler.CPUS_PER_TASK
makeOffers(executorId)
case None =>
// Ignoring the update since we don't know about the executor.
logWarning(s"Ignored task status update ($taskId state $state) " +
s"from unknown executor with ID $executorId")
}
}
//进入到TaskSchedulerImpl的statusUpdate方法中
def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
synchronized {
if (TaskState.isFinished(state)) {
cleanupTaskState(tid)
taskSet.removeRunningTask(tid)
if (state == TaskState.FINISHED) {
// 将成功的task加入到成功队列中
taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
} else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) {
taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData)
}
}
//进入到enqueueSuccessfulTask方法中看任务成功的处理
def enqueueSuccessfulTask(): Unit = {
getTaskResultExecutor.execute(new Runnable {
override def run(): Unit = Utils.logUncaughtExceptions {
try {
val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
//如果返回的是直接的结果
case directResult: DirectTaskResult[_] =>
directResult.value(taskResultSerializer.get())
(directResult, serializedData.limit())
//如果返回的是间接的结果
case IndirectTaskResult(blockId, size) =>
scheduler.handleTaskGettingResult(taskSetManager, tid)
val serializedTaskResult = sparkEnv.blockManager.getRemoteBytes(blockId)
(deserializedResult, size)
}
// 核心方法,处理成功结果的返回
scheduler.handleSuccessfulTask(taskSetManager, tid, result)
}
}}
//进入scheduler.handleSuccessfulTask
def handleSuccessfulTask(
taskSetManager: TaskSetManager,
tid: Long,
taskResult: DirectTaskResult[_]): Unit = synchronized {
taskSetManager.handleSuccessfulTask(tid, taskResult)
}
//进入 taskSetManager.handleSuccessfulTask(tid, taskResult)
def handleSuccessfulTask(tid: Long, result: DirectTaskResult[_]): Unit = {
//获取task的信息
val info = taskInfos(tid)
val index = info.index
//如果任务成功集合中没有该task,计数增加
if (!successful(index)) {
tasksSuccessful += 1
successful(index) = true
// 如果任务成功数量等于任务总体数量,进行isZombie 为true的判定
if (tasksSuccessful == numTasks) {
isZombie = true
}
}
//向dagScheduler发送任务成功的信息
sched.dagScheduler.taskEnded(tasks(index), Success, result.value(), result.accumUpdates, info)
//执行任务结束的判定
maybeFinishTaskSet()
}