Spark Worker源码分析

最新推荐文章于 2023-11-20 14:46:48 发布

画画的老顽童

最新推荐文章于 2023-11-20 14:46:48 发布

阅读量114

点赞数

分类专栏： spark

本文链接：https://blog.csdn.net/m0_46449152/article/details/109546646

版权

spark 专栏收录该内容

15 篇文章 1 订阅

订阅专栏

一、LaunchDriver
创建driverRunner
内部启动一个线程（driverRunner.start）启动DriverRunner

创建driver的工作目录
构建ProcessBuilder，传入了driver的启动命令，需要的内存大小等信息
启动driver

二、LaunchExecutor
1、创建executor的工作目录
2、创建ExecutorRunner
3、内部启动一个线程（ExecutorRunner.start)启动ExecutorRunner
调用fetchAndRunExecutor()
1）封装一个ProcessBuilder
2）调用Process的waitFor()方法，启动executor进程
3） executor执行完之后拿到返回状态
4）向executorrunner线程所属的worker actor，发送ExecutorStateChanged消息
4、向master返回一个ExecutorStateChanged消息

LaunchDriver

    case LaunchDriver(driverId, driverDesc) => {
      logInfo(s"Asked to launch driver $driverId")
      val driver = new DriverRunner(
        conf,
        driverId,
        workDir,
        sparkHome,
        driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
        self,
        akkaUrl)
      drivers(driverId) = driver
      // TODO
      driver.start()
      coresUsed += driverDesc.cores
      memoryUsed += driverDesc.mem
    }
    
-> DriverRunner.start  创建一个线程
  def start() = {
    new Thread("DriverRunner for " + driverId) {
      override def run() {
        try {
          // 1、创建driver的工作目录
          val driverDir = createWorkingDirectory()
          // 2、下载用户上传的jar
          val localJarFilename = downloadUserJar(driverDir)

          def substituteVariables(argument: String): String = argument match {
            case "{{WORKER_URL}}" => workerUrl
            case "{{USER_JAR}}" => localJarFilename
            case other => other
          }

          // If we add ability to submit multiple jars they should also be added here
          // TODO 构建ProcessBuilder，传入了driver的启动命令，需要的内存大小等信息
          val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
            sparkHome.getAbsolutePath, substituteVariables)
          // TODO 启动driver
          launchDriver(builder, driverDir, driverDesc.supervise)
        }
        catch {
          case e: Exception => finalException = Some(e)
        }

        val state =
          if (killed) {
            DriverState.KILLED
          } else if (finalException.isDefined) {
            DriverState.ERROR
          } else {
            finalExitCode match {
              case Some(0) => DriverState.FINISHED
              case _ => DriverState.FAILED
            }
          }

        finalState = Some(state)

        // 这个DriverRunner这个线程，向它所属worker的actor发送一个DriverStateChanged的事件
        worker ! DriverStateChanged(driverId, state, finalException)
      }
    }.start()
  }
-> launchDriver
  private def launchDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean) {
    builder.directory(baseDir)
    def initialize(process: Process) = {
      // Redirect stdout and stderr to files
      val stdout = new File(baseDir, "stdout")
      CommandUtils.redirectStream(process.getInputStream, stdout)

      val stderr = new File(baseDir, "stderr")
      val header = "Launch Command: %s\n%s\n\n".format(
        builder.command.mkString("\"", "\" \"", "\""), "=" * 40)
      Files.append(header, stderr, UTF_8)
      CommandUtils.redirectStream(process.getErrorStream, stderr)
    }
    // TODO
    runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
  }

LaunchExecutor

    case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
      if (masterUrl != activeMasterUrl) {
        logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
      } else {
        try {
          logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))

          // Create the executor's working directory
          val executorDir = new File(workDir, appId + "/" + execId)
          if (!executorDir.mkdirs()) {
            throw new IOException("Failed to create directory " + executorDir)
          }

          // Create local dirs for the executor. These are passed to the executor via the
          // SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
          // application finishes.
          val appLocalDirs = appDirectories.get(appId).getOrElse {
            Utils.getOrCreateLocalRootDirs(conf).map { dir =>
              Utils.createDirectory(dir).getAbsolutePath()
            }.toSeq
          }
          appDirectories(appId) = appLocalDirs
          // 创建ExecutorRunner，将参数都放入其中，然后通过他启动Executor
          val manager = new ExecutorRunner(
            appId,
            execId,
            appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
            cores_,
            memory_,
            self,
            workerId,
            host,
            webUi.boundPort,
            publicAddress,
            sparkHome,
            executorDir,
            akkaUrl,
            conf,
            appLocalDirs, ExecutorState.LOADING)
          // 将ExecutorRunner加入本地缓存
          executors(appId + "/" + execId) = manager
          // TODO 调用ExecutorRunner的start 方法来启动Executor java子进程 CoarseGrainedExecutorBackend
          manager.start()
          coresUsed += cores_
          memoryUsed += memory_
          // 向master返回一个ExecutorStateChanged消息
          master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
        } catch {
          case e: Exception => {
            logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
            if (executors.contains(appId + "/" + execId)) {
              executors(appId + "/" + execId).kill()
              executors -= appId + "/" + execId
            }
            master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
              Some(e.toString), None)
          }
        }
      }
 -> manager.start()  -> ExecutorRunner.start
   def start() {
    /**
     * 先创建一个线程对象，然后通过一个线程来启动一个java子进程 fetchAndRunExecutor
     * 使用线程不用阻塞等待，直接就可以往下执行，主线程和启动Executor并发执行，增加并行度
     */
    workerThread = new Thread("ExecutorRunner for " + fullId) {
      override def run() {
        fetchAndRunExecutor()
      }
    }
    workerThread.start()
    // Shutdown hook that kills actors on shutdown.
    shutdownHook = new Thread() {
      override def run() {
        killProcess(Some("Worker shutting down"))
      }
    }
    Runtime.getRuntime.addShutdownHook(shutdownHook)
  }
->fetchAndRunExecutor
  def fetchAndRunExecutor() {
    try {
      // Launch the process
      // 封装一个ProcessBuilder
      val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
        sparkHome.getAbsolutePath, substituteVariables)
      val command = builder.command()
      logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))

      builder.directory(executorDir)
      builder.environment.put("SPARK_LOCAL_DIRS", appLocalDirs.mkString(","))
      // In case we are running this from within the Spark Shell, avoid creating a "scala"
      // parent process for the executor command
      builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")

      // Add webUI log urls
      val baseUrl =
        s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
      builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
      builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")

      /**
       * 真正启动子进程
       */
      process = builder.start()
      
      val header = "Spark Executor Command: %s\n%s\n\n".format(
        command.mkString("\"", "\" \"", "\""), "=" * 40)

      // Redirect its stdout and stderr to files
      val stdout = new File(executorDir, "stdout")
      stdoutAppender = FileAppender(process.getInputStream, stdout, conf)

      val stderr = new File(executorDir, "stderr")
      Files.write(header, stderr, UTF_8)
      stderrAppender = FileAppender(process.getErrorStream, stderr, conf)

      // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
      // or with nonzero exit code
      // 调用Process的waitFor()方法，启动executor进程
      val exitCode = process.waitFor()
      // executor执行完之后拿到返回状态
      state = ExecutorState.EXITED
      val message = "Command exited with code " + exitCode
      // 向executorrunner线程所属的worker actor，发送ExecutorStateChanged消息
      worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode))
    } catch {
      case interrupted: InterruptedException => {
        logInfo("Runner thread for executor " + fullId + " interrupted")
        state = ExecutorState.KILLED
        killProcess(None)
      }
      case e: Exception => {
        logError("Error running executor", e)
        state = ExecutorState.FAILED
        killProcess(Some(e.toString))
      }
    }
  }

画画的老顽童

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Spark Worker源码分析

一、LaunchDriver创建driverRunner内部启动一个线程（driverRunner.start）启动DriverRunner创建driver的工作目录构建ProcessBuilder，传入了driver的启动命令，需要的内存大小等信息启动driver二、LaunchExecutor1、创建executor的工作目录2、创建ExecutorRunner3、内部启动一个线程（ExecutorRunner.start)启动ExecutorRunner调用fetchAndR
复制链接

扫一扫