[spark-src-core] 2.2 job submitted flow for local mode-part I

  now we will dive into spark internal as per this simple example(wordcount,later articles will reference this one by default) below

sparkConf.setMaster("local[2]") //-local[*] by default
//leib-confs:output all the dependencies logs
sparkConf.set("spark.logLineage","true")

val sc = new org.apache.spark.SparkContext(sparkConf)
//    (1) ShuffledRDD[4] at reduceByKey at ScalaWordCount.scala:44 []
//    +-(1) MapPartitionsRDD[3] at map at ScalaWordCount.scala:42 []
//    |  MapPartitionsRDD[2] at flatMap at ScalaWordCount.scala:41 []
//    |  MapPartitionsRDD[1] at textFile at ScalaWordCount.scala:34 []
//    |  ../spark-1.4.1/examples/src/main/resources/CHANGES.txt HadoopRDD[0] at textFile at ScalaWordCount.scala:34 []
val rdd = sc.textFile(file) //MapPartitionsRDD[1]-->HadoopRDD[0] examples/src/....Changes.txt

val fmrdd = rdd.flatMap(_.split(" ")) //-MapPartitionsRDD[2]
val maprdd = fmrdd.map((_,1)) //-MapPartitionsRDD[3]
//-this file is 584k
val rstrdd = maprdd.reduceByKey((x,y) => x+y) //-ShuffledRDD[4]; reduceByKey() will not spawn a real computating event

val arr = rstrdd.collect()  //-action now,but collect() does
val max = 10 //Integer.MAX_VALUE
var count = 0
if(arr.length > max){ //-if want to limit the result size,use rdd.take(n) or rdd.top(n) is better
  println(s"*reduce output to limit ${max},found ${arr.length}")
}
//itereately without order
for((ele,num) <- arr if(count < max)){
  count += 1
  println(ele + "," + num)
}   

 

 

  overview of job submitting flow

 



   communication figure of a job

 



    sequence figure in map side computation



   pseudo code view that corresponding to spark src 

 ======

 several kernel code paths

-job submitting method in DAGScheduler

/**-handle job submit event */
  private[scheduler] def handleJobSubmitted(jobId: Int,
      finalRDD: RDD[_],
      func: (TaskContext, Iterator[_]) => _,  //-case 1:spark internal collective func,ie it.toArray()
      partitions: Array[Int], //-from most recent rdd's partitions,see SparkContext#runJob()-L1979
      allowLocal: Boolean,
      callSite: CallSite,
      listener: JobListener,
      properties: Properties) {
    var finalStage: ResultStage = null
    try {
      // New stage creation may throw an exception if, for example, jobs are run on a
      // HadoopRDD whose underlying HDFS files have been deleted.-->>parent ShuffleMapStage is created here<<
      finalStage = newResultStage(finalRDD, partitions.size, jobId, callSite)
    } catch {
      case e: Exception =>
        logWarning("Creating new stage failed due to exception - job: " + jobId, e)
        listener.jobFailed(e)
        return
    }
    if (finalStage != null) {
      val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties) //-one job per action
      clearCacheLocs()
      logInfo("Got job %s (%s) with %d output partitions (allowLocal=%s)".format(
        job.jobId, callSite.shortForm, partitions.length, allowLocal))
      logInfo("Final stage: " + finalStage + "(" + finalStage.name + "),rdd " + finalRDD)//-ShuffleRDD for ScalaWordCount
      logInfo("Parents of final stage: " + finalStage.parents)
      logInfo("Missing parents: " + getMissingParentStages(finalStage))
      val shouldRunLocally =
        localExecutionEnabled && allowLocal && finalStage.parents.isEmpty && partitions.length == 1
      val jobSubmissionTime = clock.getTimeMillis()
      if (shouldRunLocally) {
        // Compute very short actions like first() or take() with no parent stages locally.
        listenerBus.post(
          SparkListenerJobStart(job.jobId, jobSubmissionTime, Seq.empty, properties))
        runLocally(job) //-run with same action's process in local host; mini spark job runner:no job/task schedule
      } else {  //-eg. reduceByKey()
        jobIdToActiveJob(jobId) = job
        activeJobs += job
        finalStage.resultOfJob = Some(job)
        val stageIds = jobIdToStageIds(jobId).toArray
        val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo)) //-one element in fact
        listenerBus.post( //-below is similar to runLocally()
          SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties))
        submitStage(finalStage) //-ResultStage如何提交?see into;一个job的各个map操作如何完成?在RDD#compute()中有迭代iterator()
      }
    }
    submitWaitingStages() //-check whether any waiting stages to submit
  }

 

/** Submits stage, but first *recursively* submits any missing parents.-ie. from earlier to later by executed order */
  private def submitStage(stage: Stage) {
    val jobId = activeJobForStage(stage)
    if (jobId.isDefined) {
      logInfo("*submitStage(" + stage + ")")
      if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
        val missing = getMissingParentStages(stage).sortBy(_.id)  //-from old steps to new; tasks belong to this stage
        logInfo("-*missing: " + missing)
        if (missing.isEmpty) {  //-submit the root/first stages only
          logInfo("--*Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
          submitMissingTasks(stage, jobId.get) //-this is first stage,so submit all tasks w/o dependencies
        } else {
          for (parent <- missing) { //-not leaf node,recursively calling self
            submitStage(parent)
          }
          //-resultstage is set here
          //-exists parent stage,so add thsi stage to wating queue.it will be scheduled after the prior stages finished,
          waitingStages += stage //-submit stage by stage,so keep child stage here;,see this.onReceive()>ComplementEvent
        }
      }
    } else {
      abortStage(stage, "No active job for stage " + stage.id)
    }
  

   generates DAG then split stage to tasks,submits tasks at last below

/** Called when stage's parents are available and we can now do its task. */
  private def submitMissingTasks(stage: Stage, jobId: Int) {
    logDebug("submitMissingTasks(" + stage + ")")
    // Get our pending tasks and remember them in our pendingTasks entry
    stage.pendingTasks.clear()

    // First figure out the indexes of partition ids to compute.-empty or unfinished partitions
    val partitionsToCompute: Seq[Int] = { //-control how many tasks will be generated
      stage match {
        case stage: ShuffleMapStage =>
          (0 until stage.numPartitions).filter(id => stage.outputLocs(id).isEmpty)
        case stage: ResultStage =>
          val job = stage.resultOfJob.get
          (0 until job.numPartitions).filter(id => !job.finished(id))
      }
    }

    val properties = jobIdToActiveJob.get(stage.jobId).map(_.properties).orNull
    //-mark current running stage
    runningStages += stage
    // SparkListenerStageSubmitted should be posted before testing whether tasks are
    // serializable. If tasks are not serializable, a SparkListenerStageCompleted event
    // will be posted, which should always come after a corresponding SparkListenerStageSubmitted
    // event.
    stage.latestInfo = StageInfo.fromStage(stage, Some(partitionsToCompute.size))
    outputCommitCoordinator.stageStart(stage.id)
    listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo, properties))

    // TODO: Maybe we can keep the taskBinary in Stage to avoid serializing it multiple times.
    // Broadcasted binary for the task, used to dispatch tasks to executors. Note that we broadcast
    // the serialized copy of the RDD and for each task we will deserialize it, which means each
    // task gets a different copy of the RDD. This provides stronger isolation between tasks that
    // might modify state of objects referenced in their closures. This is necessary in Hadoop
    // where the JobConf/Configuration object is not thread-safe.
    var taskBinary: Broadcast[Array[Byte]] = null
    try {
      // For ShuffleMapTask, serialize and broadcast (rdd, shuffleDep).
      // For ResultTask, serialize and broadcast (rdd, func).
      val taskBinaryBytes: Array[Byte] = stage match {
        case stage: ShuffleMapStage =>
          closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef).array() //-use stage's rdd as task's rdd
        case stage: ResultStage =>
          closureSerializer.serialize((stage.rdd, stage.resultOfJob.get.func): AnyRef).array()
      }

      taskBinary = sc.broadcast(taskBinaryBytes)
    } catch {
      // In the case of a failure during serialization, abort the stage.
      case e: NotSerializableException =>
        abortStage(stage, "Task not serializable: " + e.toString)
        runningStages -= stage

        // Abort execution
        return
      case NonFatal(e) =>
        abortStage(stage, s"Task serialization failed: $e\n${e.getStackTraceString}")
        runningStages -= stage
        return
    }
    //-note:tasks generation policy
    val tasks: Seq[Task[_]] = try {
      stage match {
        case stage: ShuffleMapStage =>  //-intermediate stage,see below
          partitionsToCompute.map { id => //-partitioned by dep-rdd's partitions( maybe deliver to root rdd's partitions)
            val locs = getPreferredLocs(stage.rdd, id)//-several locations belong to the partition,similar to hdfs blocks
            val part = stage.rdd.partitions(id) //-access the indexed id partition
            new ShuffleMapTask(stage.id, taskBinary, part, locs)  //-corresponding task
          }

        case stage: ResultStage =>  //-final stage
          val job = stage.resultOfJob.get
          partitionsToCompute.map { id =>
            val p: Int = job.partitions(id) //-a map from job partition to stage.rdd's one
            val part = stage.rdd.partitions(p)
            val locs = getPreferredLocs(stage.rdd, p) //- the replication hosts of same block?yes
            //-track info
            for(par <- stage.rdd.partitions)
              logInfo("-part/rdd:" + par + "/" + stage.rdd)
            for(loc <- locs)
              logInfo("-loc %s".format(loc))
            //-# resulttask is depended on partitions num
            new ResultTask(stage.id, taskBinary, part, locs, id)  //-corresponding task
          }
      }
    } catch {
      case NonFatal(e) =>
        abortStage(stage, s"Task creation failed: $e\n${e.getStackTraceString}")
        runningStages -= stage
        return
    }

    if (tasks.size > 0) { //-case reduceByKey(), 1 for it
      logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
      stage.pendingTasks ++= tasks
      logDebug("New pending tasks: " + stage.pendingTasks)
      taskScheduler.submitTasks(  //-deliver tasks schedule to TaskSchedulerImpl
        new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties))
      stage.latestInfo.submissionTime = Some(clock.getTimeMillis())
    } else {
      // Because we posted SparkListenerStageSubmitted earlier, we should mark
      // the stage as completed here in case there are no tasks to run
      markStageAsFinished(stage, None)

      val debugString = stage match {
        case stage: ShuffleMapStage =>
          s"Stage ${stage} is actually done; " +
            s"(available: ${stage.isAvailable}," +
            s"available outputs: ${stage.numAvailableOutputs}," +
            s"partitions: ${stage.numPartitions})"
        case stage : ResultStage =>
          s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})"
      }
      logDebug(debugString)
    }
  

 

-ShuffleMapTask core method

override def runTask(context: TaskContext): MapStatus = {
    // Deserialize the RDD using the broadcast variable.
    val deserializeStartTime = System.currentTimeMillis()
    val ser = SparkEnv.get.closureSerializer.newInstance()
    //-task serialization  see DAGScheduler#submitMissingTasks(); the rdd is last rdd in this stage(shufflemapstage)
    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( //-restore the direct-recent parent rdd
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime

    metrics = Some(context.taskMetrics)
    var writer: ShuffleWriter[Any, Any] = null
    try {
      val manager = SparkEnv.get.shuffleManager //-SortShuffleManager by default
      //'SortShuffleWriter'
      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) //-'BaseShuffleHandler'
      //-first,compute the user's lastest job business(eg. reduceByKey()) by rdd.iterator(xx);then output the result
      logInfo("precomputing,task:" + toString + ",dep:" + dep
            + ",handle:" + dep.shuffleHandle +",part:" +partitionId + ",rdd " + rdd)
      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])//-invoke recusively
      return writer.stop(success = true).get //-release resources then return MapStatus,see SortShuffleWriter#write()
    } catch {
      case e: Exception =>
        try {
          if (writer != null) {
            writer.stop(success = false)
          }
        } catch {
          case e: Exception =>
            log.debug("Could not stop writer", e)
        }
        throw e
    }
  }

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值