【Spark】Spark 运行消息通信

最新推荐文章于 2023-07-13 10:24:03 发布

勤言不勤语

最新推荐文章于 2023-07-13 10:24:03 发布

阅读量741

点赞数 1

分类专栏： Spark 文章标签： spark

本文链接：https://blog.csdn.net/w1992wishes/article/details/84435676

版权

本篇结构：

前言
详细过程

一、前言

前面介绍了 Spark Rpc 框架，及 Spark 启动时通信，这篇主要介绍 Spark 运行时通信。

下面还是以 Standalone 模式介绍：

用户提交应用程序时，应用程序的 SparkContext 会向 Master 发送应用注册消息，并由 Master 给应用分配 Executor，Executor 会向 SparkContext 发送注册成功消息；当 SparkContext 的 RDD 触发行动操作后，将创建 RDD 的 DAG，通过 DAGScheduler 进行划分 Stage，并将 Stage 转化为 TaskSet；接着由 TaskScheduler 向注册的 Executor 发送执行消息，Executor 接收到任务消息后启动并运行；最后当所有任务运行完毕，由 Driver 处理结果并回收资源。

二、详细过程

2.1、ClientEndpoint 发送 RegisterApplication 给 Master

应用程序由 main 方法作为入口开始执行，创建 SparkContext，SparkContext 的创建较复杂，其中有一步是实例化 SchedulerBackend （是 TaskScheduler 的调度后端接口），如下代码所示：

    // Create and start the scheduler
    val (sched, ts) = SparkContext.createTaskScheduler(this, master, deployMode)
    _schedulerBackend = sched

  private def createTaskScheduler(
      sc: SparkContext,
      master: String,
      deployMode: String): (SchedulerBackend, TaskScheduler) = {
    import SparkMasterRegex._

    // When running locally, don't try to re-execute tasks on failure.
    val MAX_LOCAL_TASK_FAILURES = 1

    master match {
      case "local" =>
        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)
        scheduler.initialize(backend)
        (backend, scheduler)

      case LOCAL_N_REGEX(threads) =>
        def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
        // local[*] estimates the number of cores on the machine; local[N] uses exactly N threads.
        val threadCount = if (threads == "*") localCpuCount else threads.toInt
        if (threadCount <= 0) {
          throw new SparkException(s"Asked to run locally with $threadCount threads")
        }
        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
        scheduler.initialize(backend)
        (backend, scheduler)

      case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
        def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
        // local[*, M] means the number of cores on the computer with M failures
        // local[N, M] means exactly N threads with M failures
        val threadCount = if (threads == "*") localCpuCount else threads.toInt
        val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
        scheduler.initialize(backend)
        (backend, scheduler)

      case SPARK_REGEX(sparkUrl) =>
        val scheduler = new TaskSchedulerImpl(sc)
        val masterUrls = sparkUrl.split(",").map("spark://" + _)
        val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
        scheduler.initialize(backend)
        (backend, scheduler)

      case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
        // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
        val memoryPerSlaveInt = memoryPerSlave.toInt
        if (sc.executorMemory > memoryPerSlaveInt) {
          throw new SparkException(
            "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
              memoryPerSlaveInt, sc.executorMemory))
        }

        val scheduler = new TaskSchedulerImpl(sc)
        val localCluster = new LocalSparkCluster(
          numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt, sc.conf)
        val masterUrls = localCluster.start()
        val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
        scheduler.initialize(backend)
        backend.shutdownCallback = (backend: StandaloneSchedulerBackend) => {
          localCluster.stop()
        }
        (backend, scheduler)

      case masterUrl =>
        val cm = getClusterManager(masterUrl) match {
          case Some(clusterMgr) => clusterMgr
          case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
        }
        try {
          val scheduler = cm.createTaskScheduler(sc, masterUrl)
          val backend = cm.createSchedulerBackend(sc, masterUrl, scheduler)
          cm.initialize(scheduler, backend)
          (backend, scheduler)
        } catch {
          case se: SparkException => throw se
          case NonFatal(e) =>
            throw new SparkException("External scheduler cannot be instantiated", e)
        }
    }
  }

在 Standalone 模式下 SchedulerBackend 的实现是 case SPARK_REGEX(sparkUrl) 下创建的 StandaloneSchedulerBackend：

StandaloneSchedulerBackend(
    scheduler: TaskSchedulerImpl,
    sc: SparkContext,
    masters: Array[String])
  extends CoarseGrainedSchedulerBackend

2.1.1、实例化 ClientEndpoint 和 DriverEndpoint

1.在 StandaloneSchedulerBackend 的 start 方法中会实例化 StandaloneAppClient 的 ClientEndpoint。

StandaloneSchedulerBackend#start() -> StandaloneAppClient#start()：

StandaloneSchedulerBackend # 
override def start(){
    super.start()
    ...
    client = new StandaloneAppClient(sc.env.rpcEnv, masters, appDesc, this, conf)
    client.start()
    ...
}

StandaloneAppClient #
def start() {
  // Just launch an rpcEndpoint; it will call back into the listener.
  endpoint.set(rpcEnv.setupEndpoint("AppClient", new ClientEndpoint(rpcEnv)))
}

2.在 StandaloneSchedulerBackend 的父类 CoarseGrainedSchedulerBackend 的 start 方法中会实例化 DriverEndpoint：

override def start() {
  val properties = new ArrayBuffer[(String, String)]
  for ((key, value) <- scheduler.sc.conf.getAll) {
    if (key.startsWith("spark.")) {
      properties += ((key, value))
    }
  }

  // TODO (prashant) send conf instead of properties
  driverEndpoint = createDriverEndpointRef(properties)
}

protected def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
  new DriverEndpoint(rpcEnv, properties)
}

也就是说在 SparkContext 启动过程会创建 SchedulerBackend ，而 SchedulerBackend 又会创建 ClientEndpoint 和 DriverEndpoint 两个端点。

2.1.2、应用程序的注册

应用程序的注册主要和 ClientEndpoint 相关，在 Cli

最低0.47元/天解锁文章

勤言不勤语

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
【Spark】Spark 运行消息通信

本篇结构：前言详细过程一、前言前面介绍了 Spark Rpc 框架，及 Spark 启动时通信，这篇主要介绍 Spark 运行时通信。下面还是以 Standalone 模式介绍：用户提交应用程序时，应用程序的 SparkContext 会向 Master 发送应用注册消息，并由 Master 给应用分配 Executor，Executor 会向 SparkContext 发送注册成...
复制链接

扫一扫