DriverStateChanged
case DriverStateChanged(driverId, state, exception) =>
state match {
//如果driver状态是ERROR\FINISHED\KILLED\FAILED ,removeDriver
case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
removeDriver(driverId, state, exception)
case _ =>
throw new Exception(s"Received unexpected state update for driver $driverId: $state")
}
removeDriver
private def removeDriver(
driverId: String,
finalState: DriverState,
exception: Option[Exception]) {
//打到driverId对应的driver
drivers.find(d => d.id == driverId) match {
//如果找到了,Some,样例类(Option)
case Some(driver) =>
logInfo(s"Removing driver: $driverId")
//将driver从内存缓存中清除
drivers -= driver
if (completedDrivers.size >= RETAINED_DRIVERS) {
val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
completedDrivers.trimStart(toRemove)
}
//向completedDrivers 中加入driver
completedDrivers += driver
//使用持久化引擎去除driver的持久化信息
persistenceEngine.removeDriver(driver)
//设置driver的state、exception
driver.state = finalState
driver.exception = exception
//将driver所在的worker,移除driver
driver.worker.foreach(w => w.removeDriver(driver))
schedule()
case None =>
logWarning(s"Asked to remove unknown driver: $driverId")
}
}
ExecutorStateChanged
case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
//找到executor对应的app,然后再反过来通过app内部的executors缓存获取executor信息
val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
execOption match {
//如果有值
case Some(exec) =>
//设置executor的当前状态
val appInfo = idToApp(appId)
val oldState = exec.state
exec.state = state
//如果executor正在运行执行任务,将appInfo人retry置0,(重度次数)
if (state == ExecutorState.RUNNING) {
assert(oldState == ExecutorState.LAUNCHING,
s"executor $execId state transfer from $oldState to RUNNING is illegal")
appInfo.resetRetryCount()
}
//向driver同步发送ExecutorUpdated,executor当下的状态信息
exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, false))
//如果executor完成了
if (ExecutorState.isFinished(state)) {
// Remove this executor from the worker and app
logInfo(s"Removing executor ${exec.fullId} because it is $state")
// If an application has already finished, preserve its
// state to display its information properly on the UI
//如果app没有完成
if (!appInfo.isFinished) {
//从app的缓存中移除executor
appInfo.removeExecutor(exec)
}
//从运行executor的worker的缓存中移除executor
exec.worker.removeExecutor(exec)
//如果executor的退出状态是非正常的
val normalExit = exitStatus == Some(0)
if (!normalExit
//判断application当前的重试次数,是否达到了最大值,executor反复调度都是失败,那么就认为application也失败了
&& appInfo.incrementRetryCount() >= MAX_EXECUTOR_RETRIES
&& MAX_EXECUTOR_RETRIES >= 0) { // < 0 disables this application-killing path
val execs = appInfo.executors.values
//且不存在运行中的executor
if (!execs.exists(_.state == ExecutorState.RUNNING)) {
logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
s"${appInfo.retryCount} times; removing it")
//移除application
removeApplication(appInfo, ApplicationState.FAILED)
}
}
}
schedule()
case None =>
logWarning(s"Got status update for unknown executor $appId/$execId")
}
removeApplication
前面博客有