spark core源码分析2 master启动流程

博客地址: http://blog.csdn.net/yueqian_zhu/


源码位置:org.apache.spark.deploy.master.Master.scala

一、main主方法:

def main(argStrings: Array[String]) {
    SignalLogger.register(log)
    val conf = new SparkConf
    val args = new MasterArguments(argStrings, conf)
    val (actorSystem, _, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort, conf)
    actorSystem.awaitTermination()
  }

解析spark相关的环境变量及方法参数,创建akka actorSystem及ActorRef用于与其它节点的交互,消息处理类为Master


二、Actor preStart方法

既然创建了akka,自然最先执行了master的preStart方法。
override def preStart() {
    logInfo("Starting Spark master at " + masterUrl)
    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
    // Listen for remote client disconnection events, since they don't go through Akka's watch()
    //订阅本身akka生命周期事<span style="font-size:10px;background-color: rgb(255, 255, 255);">件,<span style="font-family: Menlo;">AssociatedEvent,</span><span style="font-family: Menlo;">DisassociatedEvent之类的事件</span></span>
    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
    webUi.bind()
    masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
    //这里会启一个定时调度,检查timeout的worker进程。如果有worker超时,则将状态置为DEAD,并清理一些内存中关于该worker的信息。如果该worker中有Executor进程,则向driver发送ExecutorUpdated消息,表明该Executor也已经不可用了。如果该worker中有Driver进程,且配置driver是可以relaunch的,则重新调度在可用的worker节点上启动,不然的话就删除该Driver的内存信息。只有在该worker超时很多次之后,才真正删除,之前其实只是让该worker不被选中执行任务而已。
    context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)

    masterMetricsSystem.registerSource(masterSource)
    masterMetricsSystem.start()
    applicationMetricsSystem.start()
    // Attach the master and app metrics servlet handler to the web ui after the metrics systems are
    // started.
    masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
    applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)

    //下面是master HA过程,下次会单独介绍
    val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
      case "ZOOKEEPER" =>
        logInfo("Persisting recovery state to ZooKeeper")
        val zkFactory =
          new ZooKeeperRecoveryModeFactory(conf, SerializationExtension(context.system))
        (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
      case "FILESYSTEM" =>
        val fsFactory =
          new FileSystemRecoveryModeFactory(conf, SerializationExtension(context.system))
        (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
      case "CUSTOM" =>
        val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
        val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serialization])
          .newInstance(conf, SerializationExtension(context.system))
          .asInstanceOf[StandaloneRecoveryModeFactory]
        (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
      case _ =>
        (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
    }
    persistenceEngine = persistenceEngine_
    leaderElectionAgent = leaderElectionAgent_
  }

至此,master主动处理的流程就完了,之后就接受其他的请求来被动处理。


三、接受worker节点的注册

case RegisterWorker(id, workerHost, workerPort, cores, memory, workerUiPort, publicAddress) =>
{
  logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
    workerHost, workerPort, cores, Utils.megabytesToString(memory)))
  if (state == RecoveryState.STANDBY) {
    // ignore, don't send response
  } else if (idToWorker.contains(id)) {
    //如果worker id之前已经注册过,则注册失败
    sender ! RegisterWorkerFailed("Duplicate worker ID")
  } else {
    val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
      sender, workerUiPort, publicAddress)
    if (registerWorker(worker)) { //将worker信息加入master内存中
      persistenceEngine.addWorker(worker)
      //向worker发送RegisteredWorker消息
      sender ! RegisteredWorker(masterUrl, masterWebUiUrl)
      schedule() //调度
    } else {
      val workerAddress = worker.actor.path.address
      logWarning("Worker registration failed. Attempted to re-register worker at same " +
        "address: " + workerAddress)
      sender ! RegisterWorkerFailed("Attempted to re-register worker at same address: "
        + workerAddress)
    }
  }
}

private def schedule(): Unit = {
  if (state != RecoveryState.ALIVE) { return }
  // Drivers take strict precedence over executors
  val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
  //将可用的worker随机化,并将waitingDrivers中的driver启动
  for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
    for (driver <- waitingDrivers) {
      if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
        launchDriver(worker, driver)//向worker发送LaunchDriver消息
        waitingDrivers -= driver
      }
    }
  }
  startExecutorsOnWorkers()//见下面分析
}

private def startExecutorsOnWorkers(): Unit = {
  // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
  // in the queue, then the second app, etc.
  //这个spreadOutApps参数是说明将app尽可能分散在所有的worker中还是尽量分散在一部分worker中
  if (spreadOutApps) {
    // Try to spread out each app among all the workers, until it has all its cores
    for (app <- waitingApps if app.coresLeft > 0) {
      val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
        .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
          worker.coresFree >= app.desc.coresPerExecutor.getOrElse(1))
        .sortBy(_.coresFree).reverse
      val numUsable = usableWorkers.length
      val assigned = new Array[Int](numUsable) // Number of cores to give on each node
      var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
      var pos = 0
      //while中是为了将app尽可能分散在可用的worker中,在每个worker中启动一个或者多个Executor
      while (toAssign > 0) {
        if (usableWorkers(pos).coresFree - assigned(pos) > 0) {
          toAssign -= 1
          assigned(pos) += 1
        }
        pos = (pos + 1) % numUsable
      }
      // Now that we've decided how many cores to give on each node, let's actually give them
      for (pos <- 0 until numUsable if assigned(pos) > 0) {
        allocateWorkerResourceToExecutors(app, assigned(pos), usableWorkers(pos))
      }
    }
  } else {
    // Pack each app into as few workers as possible until we've assigned all its cores
    for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState.ALIVE) {
      for (app <- waitingApps if app.coresLeft > 0) {
        allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
      }
    }
  }
}

private def allocateWorkerResourceToExecutors(
    app: ApplicationInfo,
    coresToAllocate: Int,
    worker: WorkerInfo): Unit = {
  val memoryPerExecutor = app.desc.memoryPerExecutorMB
  val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
  var coresLeft = coresToAllocate
  while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
    val exec = app.addExecutor(worker, coresPerExecutor)
    coresLeft -= coresPerExecutor
    //向worker发送LaunchExecutor消息,并向driver发送ExecutorAdded消息
    launchExecutor(worker, exec)
    app.state = ApplicationState.RUNNING
  }
}

除HA相关消息之外,还可接收如下消息,功能处理其实也都比较简单,后续会结合Job的提交过程逐步分析。。
case RequestSubmitDriver(description)//请求提交Driver消息,记录Driver的信息并调度
case RequestKillDriver(driverId)
case RequestDriverStatus(driverId)
case RegisterApplication(description)//提交Application,记录Application的信息并调度
case ExecutorStateChanged(appId, execId, state, message, exitStatus)
case DriverStateChanged(driverId, state, exception)
case Heartbeat(workerId)//心跳,用于worker节点的保活
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值