spark源码分析之master注册机制篇

这里写图片描述
解释:
worker向master注册

  • worker在启动之后,就会向master进行注册

  • 对于worker状态为dead的,过滤掉。比如说master在一定时间期限已经完成了恢复,但是发现其中的worker为unknown的情况下,对worker进行remove,将worker的状态设置为dead,如果过了很长时间worker又莫名其妙的向master进行注册的情况下,直接过滤掉

  • 对于worker状态为unknown,master会将旧的worker信息给清理掉,替换成新的worker信息。比如说master刚启动的时候,会向worker发送新的地址的时候,master会将该worker状态设置为unknown,worker向master返回注册信息的时候,master会将旧的worker信息给清理掉,替换成新的worker信息
  • 将worker加入内存缓存中(HashMap),用持久化引擎将worker信息持久化,可能是文件系统,可能是zookeeper
  • 调用schedule()方法进行调度

源码解释:
第一步:处理worker的注册请求
源码位置:org.apache.spark.deploy.master.Master

/**
 * 处理worker的注册请求
 * 注意:
 *  这样设计Worker注册机制有一个很大的好处,在生产环境下,想要把新的Worker 加入到已经运行的Spark 集群上,
 *  不需要重新启动Spark 集群,就能够使用新加入的Worker 以提升处理性能
 */
case RegisterWorker(
    id, workerHost, workerPort, workerRef, cores, memory, workerUiPort, publicAddress) => {
  logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
    workerHost, workerPort, cores, Utils.megabytesToString(memory)))
  // 判断Master的状态是否为StandBy
  if (state == RecoveryState.STANDBY) {
    // 如果是StandBy状态则回应当前其状态为StandBy
    context.reply(MasterInStandby)
    //  如果为Alive,判断WorkerId是否存在,避免重复注册
  } else if (idToWorker.contains(id)) {
    //  如果WorkerId存在,则回复Worker注册失败"重复的worker id"
    context.reply(RegisterWorkerFailed("Duplicate worker ID"))
  } else {
    //  将worker的id,host,端口,cpu数,内存等信息封装成一个WorkerInfo
    val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
      workerRef, workerUiPort, publicAddress)
    //  判断注册Worker注册是否成功
    if (registerWorker(worker)) {
      // 持久化引擎添加worker的信息
      persistenceEngine.addWorker(worker)
      context.reply(RegisteredWorker(self, masterWebUiUrl))
      // 资源调度算法
      schedule()
      // 否则,worker注册失败
    } else {
      val workerAddress = worker.endpoint.address
      logWarning("Worker registration failed. Attempted to re-register worker at same " +
        "address: " + workerAddress)
      //  context回应,向StandaloneSchedulerBackend的StandaloneAppClient发送消息RegisteredWorker,已注册
      context.reply(RegisterWorkerFailed("Attempted to re-register worker at same address: "
        + workerAddress))
    }
  }
}

第二步:点击第一步的registerWorker
源码位置:org.apache.spark.deploy.master.Master

 /**
  * 注册worker
  */
 private def registerWorker(worker: WorkerInfo): Boolean = {
   // There may be one or more refs to dead workers on this same node (w/ different ID's),
   // remove them.
   //  过滤掉状态为DEAD的worker
   workers.filter { w =>
     (w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
   }.foreach { w =>
     //  遍历workers HashSet,移除该worker
     workers -= w
   }

   val workerAddress = worker.endpoint.address
   // 判断worker地址是否已存在
   if (addressToWorker.contains(workerAddress)) {
     // 如果已存在,该worker为oldWorker
     val oldWorker = addressToWorker(workerAddress)
     //  如果oldWorker的状态为UNKOWN,则移除该oldWorker
     if (oldWorker.state == WorkerState.UNKNOWN) {
       // A worker registering from UNKNOWN implies that the worker was restarted during recovery.
       // The old worker must thus be dead, so we will remove it and accept the new worker.
       //  一个从UNKOWN状态中注册的worker,该worker在恢复过程被重启
       //  old worker必须被杀死,移除old worker,接收新的worker
       removeWorker(oldWorker)
     } else {
       //  如果状态不是UNKOWN,则worker已存在
       //  打印如下信息:尝试重新注册worker在相同地址:......
       logInfo("Attempted to re-register worker at same address: " + workerAddress)
       //  注册worker失败,worker在同一地址重复注册,返回False,worker注册失败
       return false
     }
   }

   //  将新的worker添加到HashSet中去
   workers += worker
   //  将worker id,address信息添加到对应的HashMap中
   idToWorker(worker.id) = worker
   addressToWorker(workerAddress) = worker
   //  返回True,worker注册成功
   true
 }

第三步:点击第二步中的removeWorker
源码位置:org.apache.spark.deploy.master.Master

// 移除worker
private def removeWorker(worker: WorkerInfo) {
  logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
  worker.setState(WorkerState.DEAD)
  idToWorker -= worker.id
  addressToWorker -= worker.endpoint.address

  for (exec <- worker.executors.values) {
    logInfo("Telling app of lost executor: " + exec.id)
    // 向driver发送exeutor丢失了
    exec.application.driver.send(ExecutorUpdated(
      exec.id, ExecutorState.LOST, Some("worker lost"), None))

    // 将worker上的所有executor给清楚掉
    exec.application.removeExecutor(exec)
  }

  for (driver <- worker.drivers.values) {
    // spark自动监视,driver所在的worker挂掉的时候,也会把这个driver移除掉,如果配置supervise这个属性的时候,driver也挂掉的时候master会重新启动driver
    if (driver.desc.supervise) {
      logInfo(s"Re-launching ${driver.id}")
      relaunchDriver(driver)
    } else {
      logInfo(s"Not re-launching ${driver.id} because it was not supervised")
      removeDriver(driver.id, DriverState.ERROR, None)
    }
  }

  // 持久化引擎会移除worker
  persistenceEngine.removeWorker(worker)
}

第四步:点击第三步中的removeExecutor
源码位置:org.apache.spark.deploy.master.ApplicationInfo

/**
 * 将executor从内部的内存缓冲结构中移除
 */
private[master] def removeExecutor(exec: ExecutorDesc) {
  if (executors.contains(exec.id)) {
    removedExecutors += executors(exec.id)
    executors -= exec.id
    coresGranted -= exec.cores
  }
}

第五步:点击第三步中的relaunchDriver
源码位置:org.apache.spark.deploy.master.Master

private def relaunchDriver(driver: DriverInfo) {
  driver.worker = None
  // 将driver的状态设置为relaunching
  driver.state = DriverState.RELAUNCHING
  // 将driver加入到等待的队列当中
  waitingDrivers += driver
  schedule()
}

第六步:点击第三步中的removeDriver
源码位置:org.apache.spark.deploy.master.Master

/**
 * 移除driver
 */
private def removeDriver(
    driverId: String,
    finalState: DriverState,
    exception: Option[Exception]) {
  drivers.find(d => d.id == driverId) match {
    case Some(driver) =>
      logInfo(s"Removing driver: $driverId")
      drivers -= driver
      if (completedDrivers.size >= RETAINED_DRIVERS) {
        val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
        completedDrivers.trimStart(toRemove)
      }
      // 将driver加入到已经完成的driver中
      completedDrivers += driver
      // 将driver从持久化引擎中移除掉
      persistenceEngine.removeDriver(driver)
      // 将driver的状态设置为final
      driver.state = finalState
      driver.exception = exception
      // 将driver所在的worker中移除掉driver
      driver.worker.foreach(w => w.removeDriver(driver))
      schedule()
    case None =>
      logWarning(s"Asked to remove unknown driver: $driverId")
  }
}
}

driver向master注册

  • 用spark-submit提交sparkApplication的时候,dirver首先就会向master进行注册,将driver信息放入到内存缓存中,也就是hashmap中
  • 加入等待调度队列,也就是ArrayBuffer
  • 用持久化引擎将driver信息持久化,可能是文件系统,可能是zookeeper
  • 调用schedule()方法进行调度

源码解释:
第一步:请求提交Driver,参数 DriverDescription
源码位置:org.apache.spark.deploy.master.Master

/**
 * 请求提交Driver,参数 DriverDescription
 */
case RequestSubmitDriver(description) => {
  // 判断Master状态是否为Alive
  if (state != RecoveryState.ALIVE) {
    //  如果不为Alive,则回应提交Driver失败,需要向Alive的Master提交
    val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
      "Can only accept driver submissions in ALIVE state."
    context.reply(SubmitDriverResponse(self, false, None, msg))
  } else {
    //  打印日志:已提交Driver
    logInfo("Driver submitted " + description.command.mainClass)
    //  使用createDriver方法,创建driver
    val driver = createDriver(description)
    // 使用持久化引擎将driver的信息持久化
    persistenceEngine.addDriver(driver)
    // 将driver加入到等待调度的队列中
    waitingDrivers += driver
    // 在持久化内存中将driver加入到内部的内存缓冲结构中
    drivers.add(driver)
    schedule()

    // TODO: It might be good to instead have the submission client poll the master to determine
    //       the current status of the driver. For now it's simply "fire and forget".
    //  注册成功后,RpcCallContext回应Driver注册反应,已成功提交driver + id
    context.reply(SubmitDriverResponse(self, true, Some(driver.id),
      s"Driver successfully submitted as ${driver.id}"))
  }
}

第二步:点击第一步中createDriver
源码位置:org.apache.spark.deploy.master.Master

//  创建Driver,返回封装好的DriverInfo
private def createDriver(desc: DriverDescription): DriverInfo = {
  val now = System.currentTimeMillis()
  val date = new Date(now)
  //  将时间,driver id,desc等信息封装成一个DriverInfo返回
  new DriverInfo(now, newDriverId(date), desc, date)
}

application向master进行注册(registerApplication()方法)

  • Driver启动好之后,会执行我们的application代码,执行sparkContext的初始化,底层的SparkDeploySchedulerBackend,会通过AppClient内部的线程,ClientActor发送RegisterAppliction,到master进行Application进行注册
  • 将application信息放入到内存缓存中,也就是hashmap中
  • 将application加入等待的调度队列,也就是ArrayBuffer
  • 用持久化引擎将application信息持久化,可能是文件系统,可能是zookeeper

源码解释:
第一步:底层的SparkDeploySchedulerBackend,会通过AppClient内部的线程,ClientActor发送Register Appliction,到master进行Application进行注册
源码位置:org.apache.spark.deploy.client.AppClient

/**
 *  Register with all masters asynchronously and returns an array `Future`s for cancellation.
 *  master支持两种主备切换机制,一种的hdfs的,另外一种是基于zookeeper的(动态ha,热切换的)
 */
private def tryRegisterAllMasters(): Array[JFuture[_]] = {
  for (masterAddress <- masterRpcAddresses) yield {
    registerMasterThreadPool.submit(new Runnable {
      override def run(): Unit = try {
        if (registered.get) {
          return
        }
        logInfo("Connecting to master " + masterAddress.toSparkURL + "...")
        val masterRef =
          rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME)
        // === 发送RegisterApplication这个case cass,把appDescription发送给master,进行向master注册
        masterRef.send(RegisterApplication(appDescription, self))
      } catch {
        case ie: InterruptedException => // Cancelled
        case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
      }
    })
  }
}

第二步:处理Applicaton的注册的请求
源码位置:org.apache.spark.deploy.master.Master

/**
 * 处理Applicaton的注册的请求
 */
case RegisterApplication(description, driver) => {
  // TODO Prevent repeated registrations from some driver
  // 如果master的状态为standby,也就是当前的这个master,不是active
  // 那么applicaiton来请求注册,什么都不会做
  if (state == RecoveryState.STANDBY) {
    // ignore, don't send response
  } else {
    logInfo("Registering app " + description.name)
    // 用ApplicationDescription信息,创建ApplicationInfo
    val app = createApplication(description, driver)
    // 注册Application
    // 将Application加入缓存,将Application加入等待调度的队列
    registerApplication(app)
    logInfo("Registered app " + description.name + " with ID " + app.id)
    // 用持久化引擎,将ApplicationInfo进行持久化
    persistenceEngine.addApplication(app)
    // 反向,向SparkDeploySchedulerBackend的AppClient的ClientActor发送消息,也就是registeredApplication,而不是registerApplication
    driver.send(RegisteredApplication(app.id, self))
    schedule()
  }
}

第三步:点击第二步中的createApplication
源码位置:org.apache.spark.deploy.master.Master

private def createApplication(desc: ApplicationDescription, driver: RpcEndpointRef):
    ApplicationInfo = {
  val now = System.currentTimeMillis()
  val date = new Date(now)
  // 用当前时间戳给该application生成唯一的id
  val appId = newApplicationId(date)
  new ApplicationInfo(now, appId, desc, date, driver, defaultCores)
}

第四步:点击第二步中的registerApplication
源码位置:org.apache.spark.deploy.master.Master

private def registerApplication(app: ApplicationInfo): Unit = {
  // 拿到driver的地址
  val appAddress = app.driver.address
  //  如果driver的地址存在的情况下,就直接返回,就相当于对driver进行重复注册
  if (addressToApp.contains(appAddress)) {
    logInfo("Attempted to re-register application at same address: " + appAddress)
    return
  }

  applicationMetricsSystem.registerSource(app.appSource)

  //将Application的信息加入到内存缓存中
  apps += app
  idToApp(app.id) = app
  endpointToApp(app.driver) = app
  addressToApp(appAddress) = app

  //将Application的信息加入到等待调度的队列中,调度的算法为FIFO
  waitingApps += app
}

第五步:点击SparkDeploySchedulerBackend的RegisteredApplication
源码位置:org.apache.spark.deploy.client.AppClient

case RegisteredApplication(appId_, masterRef) =>
  // FIXME How to handle the following cases?
  // 1. A master receives multiple registrations and sends back multiple
  // RegisteredApplications due to an unstable network.
  // 2. Receive multiple RegisteredApplication from different masters because the master is
  // changing.
  // 如何修复这个bug?
  // 1、由于不稳定的网络,一个主接收多个注册并返回多个注册应用程序。
  // 2、在master的变化的过程中,如何从不同的registeredApplication从不同的master中
  appId.set(appId_)
  registered.set(true)
  master = Some(masterRef)
  listener.connected(appId.get)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值