1.Master接收注册的对象
(1)Driver
(2)Application
(3)worker
(4)Executor不会注册给Master,Executor是注册给Driver中的SchedulerBackend
2.源码说明worker启动过程
Worker是在启动后主动向Master注册,所以在生成环境中加入新的Worker到已经运行的Spark集群上,此时不需要重新启动Spark集群,就能够加入新的worker,提升处理能力。
(1)Worker类的onStart方法
override def onStart() {
assert(!registered)
logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
host, port, cores, Utils.megabytesToString(memory)))
logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
logInfo("Spark home: " + sparkHome)
createWorkDir()
shuffleService.startIfEnabled()
webUi = new WorkerWebUI(this, workDir, webUiPort)
webUi.bind()
val scheme = if (webUi.sslOptions.enabled) "https" else "http"
workerWebUiUrl = s"$scheme://$publicAddress:${webUi.boundPort}"
registerWithMaster() //调用该方法向master进行注册
metricsSystem.registerSource(workerSource)
metricsSystem.start()
// Attach the worker metrics servlet handler to the web ui after the metrics system is started.
metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
}
(2)
private def registerWithMaster() {
// onDisconnected may be triggered multiple times, so don't attempt registration
// if there are outstanding registration attempts scheduled.
registrationRetryTimer match {
case None =>
registered = false
registerMasterFutures = tryRegisterAllMasters() //有可能有许多master
connectionAttemptCount = 0
registrationRetryTimer = Some(forwordMessageScheduler.scheduleAtFixedRate(
new Runnable { //利用线程进行注册
override def run(): Unit = Utils.tryLogNonFatalError {
Option(self).foreach(_.send(ReregisterWithMaster)) //
遍历每一个进行注册,发送ReregisterWithMaster对象
}
},
INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
TimeUnit.SECONDS))
case Some(_) =>
logInfo("Not spawning another attempt to register with the master, since there is an" +
" attempt scheduled already.")
}
}
(3)
private def
tryRegisterAllMasters(): Array[JFuture[_]] = {
masterRpcAddresses.map { masterAddress =>
registerMasterThreadPool.submit(new Runnable { //这里启动一个线程
override def run(): Unit = {
try {
logInfo("Connecting to master " + masterAddress + "...")
val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
registerWithMaster(masterEndpoint) //向master进行注册
} catch {
case ie: InterruptedException => // Cancelled
case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
}
}
})
}
}
(4)
private def
registerWithMaster(masterEndpoint: RpcEndpointRef): Unit = {
//这里注册的时候发送对象
RegisterWorker给master
masterEndpoint.ask[RegisterWorkerResponse](RegisterWorker(
workerId, host, port, self, cores, memory, workerWebUiUrl))
.onComplete {
// This is a very fast action so we can use "ThreadUtils.sameThread"
case Success(msg) =>
Utils.tryLogNonFatalError {
handleRegisterResponse(msg)
}
case Failure(e) =>
logError(s"Cannot register with master: ${masterEndpoint.address}", e)
System.exit(1)
}(ThreadUtils.sameThread)
}
(5)
private def
handleRegisterResponse(msg: RegisterWorkerResponse): Unit = synchronized {
msg match {
case RegisteredWorker(masterRef, masterWebUiUrl) =>
logInfo("Successfully registered with master " + masterRef.address.toSparkURL)
registered = true
changeMaster(masterRef, masterWebUiUrl)
forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
self.send(SendHeartbeat)
}
}, 0, HEARTBEAT_MILLIS, TimeUnit.MILLISECONDS)
if (CLEANUP_ENABLED) {
logInfo(
s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
self.send(WorkDirCleanup)
}
}, CLEANUP_INTERVAL_MILLIS, CLEANUP_INTERVAL_MILLIS, TimeUnit.MILLISECONDS)
}
val execs = executors.values.map { e =>
new ExecutorDescription(e.appId, e.execId, e.cores, e.state)
}
masterRef.send(WorkerLatestState(workerId, execs.toList, drivers.keys.toSeq))
case RegisterWorkerFailed(message) =>
if (!registered) {
logError("Worker registration failed: " + message)
System.exit(1)
}
case MasterInStandby =>
// Ignore. Master not yet ready.
}
}
(5)Master收到消息
RegisterWorker
Master在接收到Worker注册的请求后,首先判断当前的Master是否是Standby模式,如果是就不处理,然后判断当前Master的内存数据结构idToWorker是否已经有该Worker的注册,如果有的话此时不会重复进行注册。
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
case RegisterWorker(
id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl) =>
logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
workerHost, workerPort, cores, Utils.megabytesToString(memory)))
if (state == RecoveryState.STANDBY) { //如果是STANDBY状态,不进行处理
context.reply(MasterInStandby)
} else if (idToWorker.contains(id)) { //如果已经注册,不会重复注册
context.reply(RegisterWorkerFailed("Duplicate worker ID"))
} else { //开始注册
//首先创建一个WorkerInfo对象
val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
workerRef, workerWebUiUrl)
if (registerWorker(worker)) { //注册worker是否成功,如果成功
persistenceEngine.addWorker(worker) //添加worker进行持久化,根据所选择的持久化方法,前一讲HA中可以看到,也就是写到指定的文件系统中。
context.reply(RegisteredWorker(self, masterWebUiUrl)) //回复消息,告诉worker注册情况
schedule() //这里进行调度,这个在后面会进行讲解。
} else {
val workerAddress = worker.endpoint.address
logWarning("Worker registration failed. Attempted to re-register worker at same " +
"address: " + workerAddress)
context.reply(RegisterWorkerFailed("Attempted to re-register worker at same address: "
+ workerAddress))
}
}
(6)
final def addWorker(worker: WorkerInfo): Unit = {
persist("worker_" + worker.id, worker)
}
(7)zookeeper的方式进行持久化
override def persist(name: String, obj: Object): Unit = {
serializeIntoFile(WORKING_DIR + "/" + name, obj)
}
(8)利用zookeeper进行持久化,保存worker元数据信息。
private def serializeIntoFile(path: String, value: AnyRef) {
val serialized = serializer.newInstance().serialize(value)
val bytes = new Array[Byte](serialized.remaining())
serialized.get(bytes)
zk.create().withMode(CreateMode.PERSISTENT).forPath(path, bytes)
}
(9)新的一个跟踪点registerWorker(worker)进行注册开始
private def registerWorker(worker: WorkerInfo): Boolean = {
// There may be one or more refs to dead workers on this same node (w/ different ID's),
// remove them.
//worker如果dead掉后不会再进行注册,这里进行了过滤
workers.filter { w =>
(w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
}.foreach { w =>
workers -= w
}
val workerAddress = worker.endpoint.address
if (addressToWorker.contains(workerAddress)) {
val oldWorker = addressToWorker(workerAddress)
if (oldWorker.state == WorkerState.UNKNOWN) { //
判断是否是UNKNOWN状态,如果是UNKNOWN状态就remove掉work,包括清理掉executor和driver。
// A worker registering from UNKNOWN implies that the worker was restarted during recovery.
// The old worker must thus be dead, so we will remove it and accept the new worker.
removeWorker(oldWorker)
} else {
logInfo("Attempted to re-register worker at same address: " + workerAddress)
return false
}
}
//如果所有的都没有问题,这个worker是一个正常的加入,那么就将该worker添加到HashMap数据结构中,也就是添加到master的内存数据结构中。
workers += worker
idToWorker(worker.id) = worker
addressToWorker(workerAddress) = worker
true
}
(9.1)
private val
idToWorker = new HashMap[String, WorkerInfo]
private val
addressToWorker = new HashMap[RpcAddress, WorkerInfo]
(10)
private def removeWorker(worker: WorkerInfo) {
logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
worker.setState(WorkerState.DEAD)
idToWorker -= worker.id
addressToWorker -= worker.endpoint.address
for (exec <- worker.executors.values) {
logInfo("Telling app of lost executor: " + exec.id)
exec.application.driver.send(ExecutorUpdated(
exec.id, ExecutorState.LOST, Some("worker lost"), None))
exec.state = ExecutorState.LOST
exec.application.removeExecutor(exec)
}
for (driver <- worker.drivers.values) {
if (driver.desc.supervise) {
logInfo(s"Re-launching ${driver.id}")
relaunchDriver(driver)
} else {
logInfo(s"Not re-launching ${driver.id} because it was not supervised")
removeDriver(driver.id, DriverState.ERROR, None)
}
}
persistenceEngine.removeWorker(worker)
}
3.driver注册,这个在前面Spark源码分析第一讲已经讲过SparkContext的注册整个过程。
4.将Application信息注册给master,注册是有ScheduleBackEnd来完成,Driver启动后会执行SparkContext的初始化,进而导致SparkDeployScheduleBackEnd产生,其内部Appclient,Appclient内部,ClientEndPoint来发送RegisterAppliction信息给master。
5.Master对Driver状态变化的处理
(1)对Driver状态变化的处理
case DriverStateChanged(driverId, state, exception) =>
state match { //无论driver是什么状态改变都是
removeDriver
case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
removeDriver(driverId, state, exception)
case _ =>
throw new Exception(s"Received unexpected state update for driver $driverId: $state")
}
(2)
private def
removeDriver(
driverId: String,
finalState: DriverState,
exception: Option[Exception]) {
drivers.find(d => d.id == driverId) match { //根据id查找曾经是否存在
case Some(driver) => //如果driver存在
logInfo(s"Removing driver: $driverId")
drivers -= driver //去掉driver
if (completedDrivers.size >= RETAINED_DRIVERS) {
val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
completedDrivers.trimStart(toRemove)
}
completedDrivers += driver
persistenceEngine.removeDriver(driver)
driver.state = finalState
driver.exception = exception
driver.worker.foreach(w => w.removeDriver(driver)) //与driver相关的worker也要进行遍历删除
schedule() //这里再次调用schedule方法
case None =>
logWarning(s"Asked to remove unknown driver: $driverId")
}
}
6.Master对Executor状态变化的处理
(1)Executor挂掉的时候系统尝试一定次数的重启(最多重试10次)
case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
execOption match {
case Some(exec) =>
val appInfo = idToApp(appId)
val oldState = exec.state
exec.state = state
if (state == ExecutorState.RUNNING) {
assert(oldState == ExecutorState.LAUNCHING,
s"executor $execId state transfer from $oldState to RUNNING is illegal")
appInfo.resetRetryCount()
}
exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus))
if (ExecutorState.isFinished(state)) {
// Remove this executor from the worker and app
logInfo(s"Removing executor ${exec.fullId} because it is $state")
// If an application has already finished, preserve its
// state to display its information properly on the UI
if (!appInfo.isFinished) {
appInfo.removeExecutor(exec)
}
exec.worker.removeExecutor(exec)
val normalExit = exitStatus == Some(0)
// Only retry certain number of times so we don't go into an infinite loop.
if (!normalExit) {
if (appInfo.incrementRetryCount() < ApplicationState.MAX_NUM_RETRY) {
schedule()
} else {
val execs = appInfo.executors.values
if (!execs.exists(_.state == ExecutorState.RUNNING)) {
logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
s"${appInfo.retryCount} times; removing it")
removeApplication(appInfo, ApplicationState.FAILED)
}
}
}
}
case None =>
logWarning(s"Got status update for unknown executor $appId/$execId")
}
(2)这里的10次是写死的,不能进行修改
private[master] object ApplicationState extends Enumeration {
type ApplicationState = Value
val WAITING, RUNNING, FINISHED, FAILED, KILLED, UNKNOWN = Value
val MAX_NUM_RETRY = 10
}