Scheduler()图解
Scheduler()代码剖析
schedule()
/**
* leen
* 调度当前可用的资源为等待中的Applications
* 这个方法将在一个新的Application被提交,或者可用的resource变化的时候被调用。
*/
private def schedule(): Unit = {
//1.首先判断Master的状态不是ALIVE的时候,则直接return
if (state != RecoveryState.ALIVE) {
return
}
//2.对处于ALIVE状态的Workers进行Shuffle[打乱]
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = 0
/**
* 在集群Worker上边启Driver
*/
//3.遍历等待状态中的Drivers,将drivers分配给所有活着的Workers
for (driver <- waitingDrivers.toList) {
var launched = false
var numWorkersVisited = 0 //已经访问过的Workers
//4.当访问过的Worker的数量小于总的活着的Worker的数量,并且没有启动Driver
while (numWorkersVisited < numWorkersAlive && !launched) {
//5.取出当前位置的Worker
val worker = shuffledAliveWorkers(curPos)
//6.把已访问的Worker + 1
numWorkersVisited += 1
//7.如果Worker空余的内存 >= driver所需要的内存 && 如果Worker空余的CPU核数 >= driver所需要的CPU核数
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
//8.在这个worker上启动这个driver
launchDriver(worker, driver)
//9.这个driver已经启动,则从等待的Drivers中去除
waitingDrivers -= driver
//10.这个driver的提交状态变为true
launched = true
}
//11.将指针位置拨向下一个Worker
curPos = (curPos + 1) % numWorkersAlive
}
}
/**
* 12.在Workers上边启动Executors
*/
startExecutorsOnWorkers()
}
schedule()
startExecutorsOnWorkers()
/**
* leen
* 在Worker上 调度并且开始Executors
* 这是一个简单的FIFO调度
* 一直尝试着在队列中装配好第一个Application之后,紧接着装配第二个...以此论推
*/
private def startExecutorsOnWorkers(): Unit = {
// 1. 遍历在队列中的Applications,并且这些Application需要分配CPU核数 > 0
for (app <- waitingApps if app.coresLeft > 0) {
// 2. 取出我们传入的每一个Executor所需要的核数
val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor
/**
* 过滤出含有足够的资源启动Executors的Workers
* 3.1 Worker的状态是ALIVE
* 3.2 Worker的空余内存 >= Application启动每个Executor所需要的内存
* 3.3 Worker的空余核数 >= Application启动每个Executor所需要的核数
* 3.4 根据Worker空余的核数倒叙排序
*/
val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
.filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
worker.coresFree >= coresPerExecutor.getOrElse(1))
.sortBy(_.coresFree).reverse
/**
* 计算每个Worker上所分配的cores的集合
*/
val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
//针对当前这个Application 在每一个可以含有足够资源启动Executors的Worker上启动Executor
for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
allocateWorkerResourceToExecutors(
app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))
}
}
}
schedule() —> startExecutorsOnWorkers()
scheduleExecutorsOnWorkers()
/**
* scheduleExecutorsOnWorkers(app: ApplicationInfo,usableWorkers: Array[WorkerInfo],spreadOutApps: Boolean)
* 在Workers上启动调度的Executors
* 返回,每个Worker所需要调用的cores的array集合
*
* 【【有两种启动方式】】
* 一种是spreadOutApps,他尝试着分配一个Application的Executors到尽量多的Workers上边;
* 另一种是非spreadOutApps,它分配到一个Application的Executors到尽量少的Workers上去;
*
* 前者通常更适合数据本地化的目的,并且它是默认的方式
*
* 分配给每个executor的内核数是可配置的
* 当明确的配置的时候,来自同一个Application的多个Executors可能在相同的Worker上被启动,当这个Worker有足够的核数与内存的情况下
* 否则,默认情况下,每一个Executor会捕获Worker上所有可用的核数,在这种情况下,在每一个Worker上可能只会启动一个Executor。
*
* 一次性分配每个Executor所需要的cores到每一个Worker上边很重要 [代替每次分配一个core]
* 【即需要一个Executor只能用一个Worker的资源】
* 假设:集群有4个Worker,每个Worker16核;要求3个Executor,每个Executor需要16核;
* 如果一个core一次,则需要从每个Worker上取出12个core分配给每一个Executor
* 由于 12 < 16 ,将没有Executor被启动。
*/
//【**** 返回一个对应Worker上启动多少个cores 的集合****】
private def scheduleExecutorsOnWorkers(
app: ApplicationInfo,
usableWorkers: Array[WorkerInfo],
spreadOutApps: Boolean): Array[Int] = {
//配置中每个Executor的cores
val coresPerExecutor = app.desc.coresPerExecutor
//每个Executor的最小核数【没配置 即为 1核 】
val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
//判断配置中的每个Executor的cores是否为空【如果为空,则表示每个Executor只用分配一个core】
val oneExecutorPerWorker = coresPerExecutor.isEmpty
//配置中每个Executor的内存
val memoryPerExecutor = app.desc.memoryPerExecutorMB
//可用Worker的数量
val numUsable = usableWorkers.length
//定义一个 每一个Worker分配的核数的集合
val assignedCores = new Array[Int](numUsable)
//定义一个 每个Worker上分配的Executor的个数的集合
val assignedExecutors = new Array[Int](numUsable)
//可以/需要分配的核数【取Application所需要的核数 与 所有Worker空余的核数总和 的最小值】
var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
/**
* canLaunchExecutor(pos: Int): Boolean
* 返回指定的worker是否可以为这个Application 运行一个Executor
*/
def canLaunchExecutor(pos: Int): Boolean = {
//spreadOutApps 的
//判断是否继续调度:可以调用/需要调用的核数 >= 每个Executor所需要的最小核数
val keepScheduling = coresToAssign >= minCoresPerExecutor
//判断是否有足够的核数: 这个worker空余的核数 - 这个worker已经分配的核数 >= 一个Executor所需要的最小核数
val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor
//如果我们允许每个Worker有多个Executors,我们总能够启动新的Executors;
//否则的话如果这个Worker上已经有一个Executor,那么只能给这个Worker更多的核数
//如果在这个Worker上没有启动Executor,或者 一个Executor上需要启动多个cores
val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutors(pos) == 0
if (launchingNewExecutor) {
//已经分配的memory
val assignedMemory = assignedExecutors(pos) * memoryPerExecutor
//判断是否有足够的memory: 这个worker空余的memory - 这个worker已经分配的memory >= 一个Executor所需要的最小memory
val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
keepScheduling && enoughCores && enoughMemory && underLimit
} else {
//我们将 cores 添加到现有的executor,因此不需要检查内存和executor的限制
keepScheduling && enoughCores
}
}
//过滤出来可以提交至少一个Executor的workers
var freeWorkers = (0 until numUsable).filter(canLaunchExecutor)
// 一直提交Executor,直到没有可用的Worker 或者是到达了Application所需要的的Executor的上限
while (freeWorkers.nonEmpty) {
freeWorkers.foreach { pos =>
var keepScheduling = true
while (keepScheduling && canLaunchExecutor(pos)) {
coresToAssign -= minCoresPerExecutor
assignedCores(pos) += minCoresPerExecutor
// 如果每个Worker只启动一个Executor ,那么每一次循环给这个Executor分配一个core
//否则。每一次循环给一个新的Executor 增加一个core
if (oneExecutorPerWorker) {
assignedExecutors(pos) = 1
} else {
assignedExecutors(pos) += 1
}
// spreadOutApps >>> 尽量分配Executors 到最多的Worker上;
// 非spreadOutApps >>> 紧着一个Worker分配Executors,直到这个Worker的资源被用尽。
if (spreadOutApps) {
keepScheduling = false
}
}
}
//每一次循环,过滤出来可以提交至少一个Executor的workers
freeWorkers = freeWorkers.filter(canLaunchExecutor)
}
//返回,每个Worker所需要调用的cores的集合
assignedCores
}
schedule() —> startExecutorsOnWorkers()
allocateWorkerResourceToExecutors()
/**
* 分配Worker的资源给一个或者多个Executors
* @param app executors 所属 application 的信息
* @param assignedCores 对于这个Application,在这个Worker上的cores数量
* @param coresPerExecutor 每个executor所需要的cores数量
* @param worker WorkerInfo
*/
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
assignedCores: Int,
coresPerExecutor: Option[Int],
worker: WorkerInfo): Unit = {
/**
* 如果每一个Executor所需的core的数量被配置,我们均匀的分配这个worker的cores给每一个Executor。
* 否则的话,我们仅仅启动一个Executor,它占用这个Worker的所有被分配出来的cores
*/
// 计算Executor的个数
val numExecutors = coresPerExecutor.map {
assignedCores / _
}.getOrElse(1)
//每个Executor所需要的cores
val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
//遍历 每一个Worker上的Executors
for (i <- 1 to numExecutors) {
//添加Executor的信息 返回 这个executor
val exec = app.addExecutor(worker, coresToAssign)
//在Worker上注册Executor
launchExecutor(worker, exec)
//变更Application的状态为 RUNNING
app.state = ApplicationState.RUNNING
}
}
schedule() —> startExecutorsOnWorkers() —> allocateWorkerResourceToExecutors()
addExecutor()
/**
* ApplicationInfo.addExecutor()
* 添加Executor的信息 返回 这个executor
* @param worker
* @param cores
* @param useID
* @return
*/
private[master] def addExecutor(
worker: WorkerInfo,
cores: Int,
useID: Option[Int] = None): ExecutorDesc = {
val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerExecutorMB)
executors(exec.id) = exec
coresGranted += cores
exec
}
schedule() —> startExecutorsOnWorkers() —> allocateWorkerResourceToExecutors()
launchExecutor()
/**
* 部署执行Executor
* @param worker
* @param exec
*/
private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
worker.addExecutor(exec)
worker.endpoint.send(LaunchExecutor(masterUrl,
exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory))
exec.application.driver.send(
ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
}