Spark 任务划分，调度，执行

最新推荐文章于 2023-05-04 16:21:51 发布

地球人是我哈

最新推荐文章于 2023-05-04 16:21:51 发布

阅读量265

点赞数

分类专栏： Spark 文章标签： spark

本文链接：https://blog.csdn.net/weixin_44865574/article/details/107114954

版权

Spark 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

Spark 任务划分，调度，执行

def main(args: Array[String]): Unit = {
    //使用IDEA开发工具完成WordCount

    //local 模式
    //创建SparkConf对象
    val conf = new SparkConf().setMaster("local[3]").setAppName("Word Count")
    val sc = new SparkContext(conf)
    //读取文件内容
    val lines = sc.textFile("F:\\MyLearning\\SparkStudy\\A_Spark_Start\\src\\main\\resources\\word.txt")
    //扁平化操作
    val word: RDD[String] = lines.flatMap(_.split(" "))
    //将word做分组
    val words: RDD[(String, Int)] = word.map((_, 1))
    //聚合
    val unit: RDD[(String, Int)] = words.reduceByKey(_ + _)
    //收集并且打印
    val result: Array[(String, Int)] = unit.collect()
    result.foreach(println)
  }

上述代码，一个world count 程序，下面来看一下每个原子的源码：

flatMap 返回一个RDD

def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = withScope {
    val cleanF = sc.clean(f)
    new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.flatMap(cleanF))
}

map 返回一个RDD

def map[U: ClassTag](f: T => U): RDD[U] = withScope {
    val cleanF = sc.clean(f)
    new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.map(cleanF))
}

reducebykey 返回一个 ShuffledRDD

} else {
      new ShuffledRDD[K, V, C](self, partitioner)
        .setSerializer(serializer)
        .setAggregator(aggregator)
        .setMapSideCombine(mapSideCombine)
}

注意： map 等算子返回的RDD 其实是继承OneToOneDependency，也就是窄依赖

1. Stage 划分

//org.apache.spark.rdd.RDD#collect
def collect(): Array[T] = withScope {
    val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
    Array.concat(results: _*)
}

//org.apache.spark.SparkContext#runJob  方法重载
def runJob[T, U: ClassTag](rdd: RDD[T], func: Iterator[T] => U): Array[U] = {
    runJob(rdd, func, 0 until rdd.partitions.length)
  }

//org.apache.spark.SparkContext#runJob
def runJob[T, U: ClassTag](
      rdd: RDD[T],
      func: Iterator[T] => U,
      partitions: Seq[Int]): Array[U] = {
    val cleanedFunc = clean(func)
    runJob(rdd, (ctx: TaskContext, it: Iterator[T]) => cleanedFunc(it), partitions)
  }

//org.apache.spark.SparkContext#runJob
def runJob[T, U: ClassTag](
      rdd: RDD[T],
      func: (TaskContext, Iterator[T]) => U,
      partitions: Seq[Int],
      resultHandler: (Int, U) => Unit): Unit = {
    if (stopped.get()) {
      throw new IllegalStateException("SparkContext has been shutdown")
    }
    val callSite = getCallSite
    val cleanedFunc = clean(func)
    logInfo("Starting job: " + callSite.shortForm)
    if (conf.getBoolean("spark.logLineage", false)) {
      logInfo("RDD's recursive dependencies:\n" + rdd.toDebugString)
    }
    //有向无环图调度器
    dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, resultHandler, localProperties.get)
    progressBar.foreach(_.finishAll())
    rdd.doCheckpoint()
  }

//org.apache.spark.scheduler.DAGScheduler#runJob
def runJob[T, U](
      rdd: RDD[T],
      func: (TaskContext, Iterator[T]) => U,
      partitions: Seq[Int],
      callSite: CallSite,
      resultHandler: (Int, U) => Unit,
      properties: Properties): Unit = {
    val start = System.nanoTime
    //提交任务
    val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
    ThreadUtils.awaitReady(waiter.completionFuture, Duration.Inf)
        
    //一直在等待
    waiter.completionFuture.value.get match {
      case scala.util.Success(_) =>
        logInfo("Job %d finished: %s, took %f s".format
          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
      case scala.util.Failure(exception) =>
        logInfo("Job %d failed: %s, took %f s".format
          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
        // SPARK-8644: Include user stack trace in exceptions coming from DAGScheduler.
        val callerStackTrace = Thread.currentThread().getStackTrace.tail
        exception.setStackTrace(exception.getStackTrace ++ callerStackTrace)
        throw exception
    }
  }

//org.apache.spark.scheduler.DAGScheduler#submitJob
// eventProcessLoop 会一直添加JobSubmitted 对象
eventProcessLoop.post(JobSubmitted(
      jobId, rdd, func2, partitions.toArray, callSite, waiter,
      SerializationUtils.clone(properties)))
    
//其实eventProcessLoop 是一个DAGSchedulerEventProcessLoop 对象
//我们暂时不知道DAGSchedulerEventProcessLoop是干嘛的，只能是跟进源码
private[spark] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
    
//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop
//可以发现,DAGSchedulerEventProcessLoop 继承 EventLoop,继续跟进源码
private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler)
  extends EventLoop[DAGSchedulerEvent]("dag-scheduler-event-loop") with Logging {
    
//org.apache.spark.util.EventLoop
//可以发现存在一个 BlockingQueue，该对象属于 java.util.concurrent 包
// jdk1.6后juc 阻塞式队列，双端式队列
// BlockingQueue 阻塞式队列,当队列满了会阻塞,当队列为空也会阻塞
private val eventQueue: BlockingQueue[E] = new LinkedBlockingDeque[E]()
    
//org.apache.spark.scheduler.DAGScheduler#submitJob
//回到之前的源码发现，eventProcessLoop 就是往阻塞队列中发送对象
eventProcessLoop.post(JobSubmitted(
      jobId, rdd, func2, partitions.toArray, callSite, waiter,
      SerializationUtils.clone(properties)))
    
//org.apache.spark.util.EventLoop
// 既然走到了EventLoop，势必会执行run(),也就是执行 onReceive(event)
override def run(): Unit = {
      try {
        while (!stopped.get) {
          val event = eventQueue.take()
          try {
            // 终端生命周期之一
            onReceive(event)
   	......
                
}
            
//因为EventLoop是一个抽象类，所以只能去其实现类找对应的方法
//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop#onReceive
private def doOnReceive(event: DAGSchedulerEvent): Unit = event match {
    //因为前面eventProcessLoop.post(JobSubmitted())正好对应上
    case JobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties) =>
      //所以下一步走 handleJobSubmitted()
      dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties)
  }
            
//org.apache.spark.scheduler.DAGScheduler#handleJobSubmitted
//创建最终的阶段
finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)

//org.apache.spark.scheduler.DAGScheduler#createResultStage
//创建或者获取上一个阶段 
val parents = getOrCreateParentStages(rdd, jobId)
    
//org.apache.spark.scheduler.DAGScheduler#getOrCreateParentStages
private def getOrCreateParentStages(rdd: RDD[_], firstJobId: Int): List[Stage] = {
    //getShuffleDependencies()主要作用:
    //判断当前RDD的依赖关系是否存在shuffle dependency
    //如果存在shuffle dependency,将该RDD添加到此HashSet
    //如果不存在，去判断该RDD的上一个RDD是否存在shuffle dependency
    getShuffleDependencies(rdd).map { shuffleDep =>
      getOrCreateShuffleMapStage(shuffleDep, firstJobId)
    }.toList
}
            
//org.apache.spark.scheduler.DAGScheduler#getOrCreateShuffleMapStage
private def getOrCreateShuffleMapStage(
      shuffleDep: ShuffleDependency[_, _, _],
      firstJobId: Int): ShuffleMapStage = {
    shuffleIdToMapStage.get(shuffleDep.shuffleId) match {
      case Some(stage) =>
        stage

      case None =>
        // Create stages for all missing ancestor shuffle dependencies.
        getMissingAncestorShuffleDependencies(shuffleDep.rdd).foreach { dep =>
          if (!shuffleIdToMapStage.contains(dep.shuffleId)) {
            // ShuffleMapStage
            createShuffleMapStage(dep, firstJobId)
          }
        }
        createShuffleMapStage(shuffleDep, firstJobId)
    }
  }

//所以最终，一个简单的world count 程序是存在两个Stage 
//1. ResultStage
//2. ShuffleMapStage
// 由此可见，spark stage 的划分，是根据RDD 的转换中是否存在 Shuffle 操作来决定的
// 并且 Stage的划分是从 Action 算子开始，一步一步倒推过去，最终划分Stage

2. 创建job，提交task,task执行

//继续看源码
//org.apache.spark.scheduler.DAGScheduler#handleJobSubmitted
//最终，给ShuffleMapStage 放到了ResultStage，其实就是ResultStage包含给ShuffleMapStage
finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)
// 将finalStage 放入 ActiveJob，证明，job里面包含Stage
// 所以有的时候说 一个job里面包含多个阶段，这句话是没问题的，但是真正意义上来说，一个job里面只包含了一个
//Stage,但是这一个Stage里面又包含多个Stage
val job = new ActiveJob(jobId, finalStage, callSite, listener, properties)
...
... 
//提交Stage
submitStage(finalStage)
    
//org.apache.spark.scheduler.DAGScheduler#submitStage
// getMissingParentStages()  判断是否存在上一个Stage
val missing = getMissingParentStages(stage).sortBy(_.id)
//这里使用递归，一直取出上一级Stage,直到没有上一级Stage,执行submitMissingTasks()
if (missing.isEmpty) {
     //这里提交的是shuffle Map 任务
     submitMissingTasks(stage, jobId.get)
} else {
    for (parent <- missing) {
       submitStage(parent)
    }
   waitingStages += stage
}

//org.apache.spark.scheduler.DAGScheduler#submitMissingTasks
case stage: ShuffleMapStage =>
  stage.pendingPartitions.clear()
  partitionsToCompute.map { id =>
    val locs = taskIdToLocations(id)
    val part = partitions(id)
    stage.pendingPartitions += id
    //创建Task,Task 的个数根据分区个数决定
    new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
      taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
      Option(sc.applicationId), sc.applicationAttemptId, stage.rdd.isBarrier())
  }

case stage: ResultStage =>
  partitionsToCompute.map { id =>
    val p: Int = stage.partitions(id)
    val part = partitions(p)
    val locs = taskIdToLocations(id)
    new ResultTask(stage.id, stage.latestInfo.attemptNumber,
      taskBinary, part, locs, id, properties, serializedTaskMetrics,
      Option(jobId), Option(sc.applicationId), sc.applicationAttemptId,
      stage.rdd.isBarrier())
  }
}


//提交下一个阶段的任务
// Spark 在划分Stage 的时候，是从后向前划分,但是在提交任务的时候,是从前向后划分
// 比如 : Stage  collect --> reducebykey -->map -->flatmap
//        submitTask flatmap --> map --> reducebykey --> collect
submitWaitingChildStages(stage)


if (tasks.size > 0) {
  //提交任务
  taskScheduler.submitTasks(new TaskSet(
    tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties))
}
//org.apache.spark.scheduler.TaskSchedulerImpl#submitTasks
val manager = createTaskSetManager(taskSet, maxTaskFailures)
schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
    
    
//org.apache.spark.scheduler.FIFOSchedulableBuilder#addTaskSetManager
override def addTaskSetManager(manager: Schedulable, properties: Properties) {
    //先添加到任务池,当你机器没有资源的时候，任务可以在池里等待资源充足时在运行
    rootPool.addSchedulable(manager)
}

//org.apache.spark.scheduler.TaskSchedulerImpl#submitTasks
backend.reviveOffers()

//org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
case ReviveOffers =>
        makeOffers()
//org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.DriverEndpoint#makeOffers
//从任务池获取任务，如果需要执行
if (!taskDescs.isEmpty) {
   launchTasks(taskDescs)
}

//org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.DriverEndpoint#launchTasks
// 由于Task 是需要从driver 发送到executor端执行，所以必须序列化
val serializedTask = TaskDescription.encode(task)
//driver 与 executor 进行通信
executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
    
//回过头来看executor终端
//org.apache.spark.executor.CoarseGrainedExecutorBackend#receive
case LaunchTask(data) =>
  if (executor == null) {
    exitExecutor(1, "Received LaunchTask command but executor was null")
  } else {
    // Task 反序列化
    val taskDesc = TaskDescription.decode(data.value)
    logInfo("Got assigned task " + taskDesc.taskId)
    // 执行任务
    executor.launchTask(this, taskDesc)
  }

地球人是我哈

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Spark 任务划分，调度，执行

Spark 任务划分，调度，执行def main(args: Array[String]): Unit = { //使用IDEA开发工具完成WordCount //local 模式 //创建SparkConf对象 val conf = new SparkConf().setMaster("local[3]").setAppName("Word Count") val sc = new SparkContext(conf) //读取文件内容 val
复制链接

扫一扫