Master.scala的核心方法
private def schedule(): Unit = {
// 对master状态判断,是否为ALIVE,因为standby是不会进行资源调度的
if (state != RecoveryState.ALIVE) {
return
}
// Drivers take strict precedence over executors
// Random.shuffle 对转入的集合元素随机打乱
// 取出workers中之前注册的worker,进行过滤,状态必须是ALIVE的worker
// 对状态ALIVE的worker调用Random.shuffle方法
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = 0
/**
* 这里的代码只会在yarn-cluster模式下运行
* 这里调度diver,在yarn-cluster提交模式下,才会注册driver,yarn-client和standalone模式
* 都只会在本地启动diver,不会住处driver
*/
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
// start from the last worker that was assigned a driver, and continue onwards until we have
// explored all alive workers.
var launched = false
var numWorkersVisited = 0
// 如果有alive的worker没有遍历到,就继续遍历,而且diver还没有被启动
while (numWorkersVisited < numWorkersAlive && !launched) {
val worker = shuffledAliveWorkers(curPos)
numWorkersVisited += 1
// 如果当期worker的空闲内存量大于等于driver需要的内存,并且worker的空闲cup数量大于等于diver需要的cup数量,就启动driver
// 并且将diver从watingDrivers等待调度的队列中溢出
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
launchDriver(worker, driver)
//并指定下一个调度的worker
waitingDrivers -= driver
launched = true
}
curPos = (curPos + 1) % numWorkersAlive
}
}
startExecutorsOnWorkers()
}
①Driver的分配
// 在某一个worker上启动diver
private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
logInfo("Launching driver " + driver.id + " on worker " + worker.id)
// 将driver加入worker的内存缓存中
// 将worker内使用的内存和数量,都加上dirver需要的内存和数量
worker.addDriver(driver)
// 同时把worker也加入到diver的内存缓存中,这里互相之间就可以相互找到对方
driver.worker = Some(worker)
// 发送LanchDriver消息,让worker启动Driver
worker.endpoint.send(LaunchDriver(driver.id, driver.desc))
// 将driver的状态置为RUNNING
driver.state = DriverState.RUNNING
}
private def removeDriver(
driverId: String,
finalState: DriverState,
exception: Option[Exception]) {
// 使用高阶函数,找到 id对应的dirver
drivers.find(d => d.id == driverId) match {
case Some(driver) =>
logInfo(s"Removing driver: $driverId")
drivers -= driver // 将driver从内存转成中清除
if (completedDrivers.size >= RETAINED_DRIVERS) {
val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
completedDrivers.trimStart(toRemove)
}
// 加入已完成driver中
completedDrivers += driver // 添加到已完成的dirver中
persistenceEngine.removeDriver(driver) // 从持久化引擎中删除没有连接成功的driver
driver.state = finalState
driver.exception = exception
driver.worker.foreach(w => w.removeDriver(driver)) // 从worker里移除driver
schedule()
case None =>
logWarning(s"Asked to remove unknown driver: $driverId")
}
}
}
②application的分配
/**
* Schedule and launch executors on workers
*/
/**
* app的调度机制,两种调度机制
* ①spreadOut 将app要使用的资源分平均分配到workers上
* ②非spreaOut 将app尽可能多的分配到一个或几个worker上,这样其他的worker就不用分配了
*/
/
private def startExecutorsOnWorkers(): Unit = {
// Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
// in the queue, then the second app, etc.
// 遍历等待调度的app,同时过滤出它的core
for (app <- waitingApps if app.coresLeft > 0) {
val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor
// Filter out workers that don't have enough resources to launch an executor
//过滤出状态为ALIVE,可以被app使用的worker
// 内存可以至少启动一个executor,并且之前没有启动过executor
val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
.filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
worker.coresFree >= coresPerExecutor.getOrElse(1))
.sortBy(_.coresFree).reverse
val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
// Now that we've decided how many cores to allocate on each worker, let's allocate them
for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
allocateWorkerResourceToExecutors(
app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))
}
}
}
private def scheduleExecutorsOnWorkers(
app: ApplicationInfo,
usableWorkers: Array[WorkerInfo],
spreadOutApps: Boolean): Array[Int] = {
val coresPerExecutor = app.desc.coresPerExecutor
val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
val oneExecutorPerWorker = coresPerExecutor.isEmpty
val memoryPerExecutor = app.desc.memoryPerExecutorMB
val numUsable = usableWorkers.length
val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum) // 要给app分配的的每个workr的cup数量 //coresToAssign app要分配的cup数量和worker总共使用cup数量的最小值
...
while (freeWorkers.nonEmpty) {
freeWorkers.foreach { pos =>
var keepScheduling = true
while (keepScheduling && canLaunchExecutor(pos)) {
coresToAssign -= minCoresPerExecutor
assignedCores(pos) += minCoresPerExecutor
// If we are launching one executor per worker, then every iteration assigns 1 core
// to the executor. Otherwise, every iteration assigns cores to a new executor.
// 将每个app要启动的executor平均分配到各个workers上,例如:
// 有20个cup core, 实际分配会循环两遍,第一次给每个worker分配一个core
// 最后最每个worker分配2个cup core,如果有更多cup core分配,则后续依次累加
if (oneExecutorPerWorker) {
assignedExecutors(pos) = 1
} else {
assignedExecutors(pos) += 1
}
// Spreading out an application means spreading out its executors across as
// many workers as possible. If we are not spreading out, then we should keep
// scheduling executors on this worker until we use all of its resources.
// Otherwise, just move on to the next worker.
if (spreadOutApps) {
keepScheduling = false
}
}
}
freeWorkers = freeWorkers.filter(canLaunchExecutor)
}
assignedCores
}
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
assignedCores: Int,
coresPerExecutor: Option[Int],
worker: WorkerInfo): Unit = {
// If the number of cores per executor is specified, we divide the cores assigned
// to this worker evenly among the executors with no remainder.
// Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
for (i <- 1 to numExecutors) {
val exec = app.addExecutor(worker, coresToAssign)
launchExecutor(worker, exec)
app.state = ApplicationState.RUNNING
}
}
各个版本的spark,有细微的差异,但是主要的结构和思想都是一样的。