源码位置:org.apache.spark.deploy.master.Master.scala
一、main主方法:
- def main(argStrings: Array[String]) {
- SignalLogger.register(log)
- val conf = new SparkConf
- val args = new MasterArguments(argStrings, conf)
- val (actorSystem, _, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort, conf)
- actorSystem.awaitTermination()
- }
解析spark相关的环境变量及方法参数,创建akka actorSystem及ActorRef用于与其它节点的交互,消息处理类为Master
- 既然创建了akka,自然最先执行了master的preStart方法。
- override def preStart() {
- logInfo("Starting Spark master at " + masterUrl)
- logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
- // Listen for remote client disconnection events, since they don't go through Akka's watch()
- //订阅本身akka生命周期事<span style="font-size:10px;background-color: rgb(255, 255, 255);">件,<span style="font-family: Menlo;">AssociatedEvent,</span><span style="font-family: Menlo;">DisassociatedEvent之类的事件</span></span>
- context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
- webUi.bind()
- masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
- //这里会启一个定时调度,检查timeout的worker进程。如果有worker超时,则将状态置为DEAD,并清理一些内存中关于该worker的信息。如果该worker中有Executor进程,则向driver发送ExecutorUpdated消息,表明该Executor也已经不可用了。如果该worker中有Driver进程,且配置driver是可以relaunch的,则重新调度在可用的worker节点上启动,不然的话就删除该Driver的内存信息。只有在该worker超时很多次之后,才真正删除,之前其实只是让该worker不被选中执行任务而已。
- context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
- masterMetricsSystem.registerSource(masterSource)
- masterMetricsSystem.start()
- applicationMetricsSystem.start()
- // Attach the master and app metrics servlet handler to the web ui after the metrics systems are
- // started.
- masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
- applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
- //下面是master HA过程,下次会单独介绍
- val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
- case "ZOOKEEPER" =>
- logInfo("Persisting recovery state to ZooKeeper")
- val zkFactory =
- new ZooKeeperRecoveryModeFactory(conf, SerializationExtension(context.system))
- (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
- case "FILESYSTEM" =>
- val fsFactory =
- new FileSystemRecoveryModeFactory(conf, SerializationExtension(context.system))
- (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
- case "CUSTOM" =>
- val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
- val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serialization])
- .newInstance(conf, SerializationExtension(context.system))
- .asInstanceOf[StandaloneRecoveryModeFactory]
- (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
- case _ =>
- (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
- }
- persistenceEngine = persistenceEngine_
- leaderElectionAgent = leaderElectionAgent_
至此,master主动处理的流程就完了,之后就接受其他的请求来被动处理。
三、接受worker节点的注册
- case RegisterWorker(id, workerHost, workerPort, cores, memory, workerUiPort, publicAddress) =>
- {
- logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
- workerHost, workerPort, cores, Utils.megabytesToString(memory)))
- if (state == RecoveryState.STANDBY) {
- // ignore, don't send response
- } else if (idToWorker.contains(id)) {
- //如果worker id之前已经注册过,则注册失败
- sender ! RegisterWorkerFailed("Duplicate worker ID")
- } else {
- val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
- sender, workerUiPort, publicAddress)
- if (registerWorker(worker)) { //将worker信息加入master内存中
- persistenceEngine.addWorker(worker)
- //向worker发送RegisteredWorker消息
- sender ! RegisteredWorker(masterUrl, masterWebUiUrl)
- schedule() //调度
- } else {
- val workerAddress = worker.actor.path.address
- logWarning("Worker registration failed. Attempted to re-register worker at same " +
- "address: " + workerAddress)
- sender ! RegisterWorkerFailed("Attempted to re-register worker at same address: "
- + workerAddress)
- }
- }
- }
- private def schedule(): Unit = {
- if (state != RecoveryState.ALIVE) { return }
- // Drivers take strict precedence over executors
- val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
- //将可用的worker随机化,并将waitingDrivers中的driver启动
- for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
- for (driver <- waitingDrivers) {
- if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
- launchDriver(worker, driver)//向worker发送LaunchDriver消息
- waitingDrivers -= driver
- }
- }
- }
- startExecutorsOnWorkers()//见下面分析
- }
- private def startExecutorsOnWorkers(): Unit = {
- // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
- // in the queue, then the second app, etc.
- //这个spreadOutApps参数是说明将app尽可能分散在所有的worker中还是尽量分散在一部分worker中
- if (spreadOutApps) {
- // Try to spread out each app among all the workers, until it has all its cores
- for (app <- waitingApps if app.coresLeft > 0) {
- val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
- .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
- worker.coresFree >= app.desc.coresPerExecutor.getOrElse(1))
- .sortBy(_.coresFree).reverse
- val numUsable = usableWorkers.length
- val assigned = new Array[Int](numUsable) // Number of cores to give on each node
- var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
- var pos = 0
- //while中是为了将app尽可能分散在可用的worker中,在每个worker中启动一个或者多个Executor
- while (toAssign > 0) {
- if (usableWorkers(pos).coresFree - assigned(pos) > 0) {
- toAssign -= 1
- assigned(pos) += 1
- }
- pos = (pos + 1) % numUsable
- }
- // Now that we've decided how many cores to give on each node, let's actually give them
- for (pos <- 0 until numUsable if assigned(pos) > 0) {
- allocateWorkerResourceToExecutors(app, assigned(pos), usableWorkers(pos))
- }
- }
- } else {
- // Pack each app into as few workers as possible until we've assigned all its cores
- for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState.ALIVE) {
- for (app <- waitingApps if app.coresLeft > 0) {
- allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
- }
- }
- }
- }
- private def allocateWorkerResourceToExecutors(
- app: ApplicationInfo,
- coresToAllocate: Int,
- worker: WorkerInfo): Unit = {
- val memoryPerExecutor = app.desc.memoryPerExecutorMB
- val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
- var coresLeft = coresToAllocate
- while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
- val exec = app.addExecutor(worker, coresPerExecutor)
- coresLeft -= coresPerExecutor
- //向worker发送LaunchExecutor消息,并向driver发送ExecutorAdded消息
- launchExecutor(worker, exec)
- app.state = ApplicationState.RUNNING
- }
- }
除HA相关消息之外,还可接收如下消息,功能处理其实也都比较简单,后续会结合Job的提交过程逐步分析。。
case RequestSubmitDriver(description)//请求提交Driver消息,记录Driver的信息并调度
case RequestKillDriver(driverId)
case RequestDriverStatus(driverId)
case RegisterApplication(description)//提交Application,记录Application的信息并调度
case ExecutorStateChanged(appId, execId, state, message, exitStatus)
case DriverStateChanged(driverId, state, exception)
case Heartbeat(workerId)//心跳,用于worker节点的保活
转载: http://blog.csdn.net/yueqian_zhu/article/details/47907095