Driver信息改变
case DriverStateChanged(driverId, state, exception) => {
state match {
// 如果Driver的状态是错误、完成、杀死、失败,就移除Driver
case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
removeDriver(driverId, state, exception)
case _ =>
throw new Exception(s"Received unexpected state update for driver $driverId: $state")
}
}
// 删除driver
def removeDriver(driverId: String, finalState: DriverState, exception: Option[Exception]) {
//用Scala高阶函数find()根据driverId,查找到driver
drivers.find(d => d.id == driverId) match {
case Some(driver) =>
logInfo(s"Removing driver: $driverId")
//将driver将内存缓存中删除
drivers -= driver
if (completedDrivers.size >= RETAINED_DRIVERS) {
val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
completedDrivers.trimStart(toRemove)
}
//将driver加入到已经完成的completeDrivers
completedDrivers += driver
//从持久化引擎中删除driver
persistenceEngine.removeDriver(driver)
//设置driver状态设置为完成
driver.state = finalState
driver.exception = exception
//从worker中遍历删除传入的driver
driver.worker.foreach(w => w.removeDriver(driver))
//重新调用schedule
schedule()
case None =>
logWarning(s"Asked to remove unknown driver: $driverId")
}
}
Executor信息改变
case ExecutorStateChanged(appId, execId, state, message, exitStatus) => {
// 找到Executor对应的Application,然后再反过来通过Application内部的Executor缓存获取Executor信息
val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
execOption match {
case Some(exec) => {
// 如果有值
val appInfo = idToApp(appId)
exec.state = state
if (state == ExecutorState.RUNNING) { appInfo.resetRetryCount() }
// 向driver同步发送ExecutorUpdated消息
exec.application.driver ! ExecutorUpdated(execId, state, message, exitStatus)
// 判断,如果Executor完成了
if (ExecutorState.isFinished(state)) {
// Remove this executor from the worker and app
logInfo(s"Removing executor ${exec.fullId} because it is $state")
// 从Application缓存中移除Executor
appInfo.removeExecutor(exec)
// 从运行Executor的Worker的缓存中移除Executor
exec.worker.removeExecutor(exec)
// 判断 如果Executor的退出状态是非正常的
val normalExit = exitStatus == Some(0)
// Only retry certain number of times so we don't go into an infinite loop.
if (!normalExit) {
// 判断Application当前的重试次数,是否达到了最大值,最大值是10
// 也就是说,Executor反复调度都是失败,那么认为Application也失败了
if (appInfo.incrementRetryCount() < ApplicationState.MAX_NUM_RETRY) {
// 重新进行调度
schedule()
} else {
// 否则,进行移除Application操作
val execs = appInfo.executors.values
if (!execs.exists(_.state == ExecutorState.RUNNING)) {
logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
s"${appInfo.retryCount} times; removing it")
removeApplication(appInfo, ApplicationState.FAILED)
}
}
}
}
}
case None =>
logWarning(s"Got status update for unknown executor $appId/$execId")
}
}
接下来看下removeApplication()方法
def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
if (apps.contains(app)) {
logInfo("Removing app " + app.id)
//从application队列(hashset)中删除当前application
apps -= app
idToApp -= app.id
actorToApp -= app.driver
addressToApp -= app.driver.path.address
if (completedApps.size >= RETAINED_APPLICATIONS) {
val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
completedApps.take(toRemove).foreach( a => {
appIdToUI.remove(a.id).foreach { ui => webUi.detachSparkUI(ui) }
applicationMetricsSystem.removeSource(a.appSource)
})
completedApps.trimStart(toRemove)
}
//加入已完成的application队列
completedApps += app // Remember it in our history
//从当前等待运行的application队列中删除当前APP
waitingApps -= app
// If application events are logged, use them to rebuild the UI
rebuildSparkUI(app)
for (exec <- app.executors.values) {
//停止executor
exec.worker.removeExecutor(exec)
exec.worker.actor ! KillExecutor(masterUrl, exec.application.id, exec.id)
exec.state = ExecutorState.KILLED
}
app.markFinished(state)
if (state != ApplicationState.FINISHED) {
//从driver中删除application
app.driver ! ApplicationRemoved(state.toString)
}
//从持久化引擎中删除application
persistenceEngine.removeApplication(app)
//从新调度任务
schedule()
// Tell all workers that the application has finished, so they can clean up any app state.
//告诉所有的worker,APP已经启动完成了,所以他们可以清空APP state
workers.foreach { w =>
w.actor ! ApplicationFinished(app.id)
}
}
}