近来学习Spark 执行流程,解读追踪Spark启动Executer源码。Scala版本为2.12.10 ,Spark版本为spark-3.0.0-preview2-bin-hadoop2.7
SparkContext.scala
// start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's
// constructor
558 row
_taskScheduler.start()
TaskScheduler的实现类为TaskSchedulerImpl跳转到TaskSchedulerImpl.scala
196 row
backend.start()
backend对象的实现类为 StandaloneSchedulerBackend
跳转到StandaloneSchedulerBackend.scala
63 row
super.start()
//启动SchedulerBackEnd中DiverActor
CoarseGrainedSchedulerBackend.scala 423 row
protected def createDriverEndpoint(): DriverEndpoint = new DriverEndpoint()
//接下来启动SchedulerBackEnd中的ClientActor
118 row -125 row
val appDesc = ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
webUrl, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor, initialExecutorLimit,
resourceReqsPerExecutor = executorResourceReqs)
转到 121 row
client = new StandaloneAppClient(sc.env.rpcEnv, masters, appDesc, this, conf)
StandaloneAppClient.scala
转到 84 row
override def onStart(): Unit = {
try {
//开始注册
registerWithMaster(1)
} catch {
case e: Exception =>
logWarning("Failed to connect to master", e)
markDisconnected()
stop()
}
}
client.start()
转到 141 row
registerMasterFutures.set(tryRegisterAllMasters())
RpcEndpoint.scala 执行流程
* {@code constructor -> onStart -> receive* -> onStop}
转到 107开始真正注册
其中发送的appDescription对象就是Master 接收的对象
masterRef.send(RegisterApplication(appDescription, self))
转到Master.scala 272
//该description对象来自StandaloneAppClient 发送的appDescription
case RegisterApplication(description, driver) =>
// TODO Prevent repeated registrations from some driver
if (state == RecoveryState.STANDBY) {
// ignore, don't send response
} else {
logInfo("Registering app " + description.name)
val app = createApplication(description, driver)
registerApplication(app)
logInfo("Registered app " + description.name + " with ID " + app.id)
persistenceEngine.addApplication(app)
//注册完成RegisteredApplication 回复client 注册完成
driver.send(RegisteredApplication(app.id, self))
//作用就是调度资源,然后在worker中启动executer
跳转到777 行
schedule()
}
跳转到 907 row
private def registerApplication(app: ApplicationInfo): Unit = {
val appAddress = app.driver.address
if (addressToApp.contains(appAddress)) {
logInfo("Attempted to re-register application at same address: " + appAddress)
return
}
applicationMetricsSystem.registerSource(app.appSource)
apps += app
idToApp(app.id) = app
endpointToApp(app.driver) = app
addressToApp(appAddress) = app
//使用默认的FIFOScheduler调度策略将注册的application任务加载到ArrayBuffer进来,在队列中进行等待
waitingApps += app
}
private val waitingApps = new ArrayBuffer[ApplicationInfo]
跳转 777行
/**
* Schedule the currently available resources among waiting apps. This method will be called
* every time a new app joins or resource availability changes.
*/
private def schedule(): Unit = {
if (state != RecoveryState.ALIVE) {
return
}
// Drivers take strict precedence over executors
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = 0
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
// start from the last worker that was assigned a driver, and continue onwards until we have
// explored all alive workers.
var launched = false
var isClusterIdle = true
var numWorkersVisited = 0
while (numWorkersVisited < numWorkersAlive && !launched) {
val worker = shuffledAliveWorkers(curPos)
isClusterIdle = worker.drivers.isEmpty && worker.executors.isEmpty
numWorkersVisited += 1
if (canLaunchDriver(worker, driver.desc)) {
val allocated = worker.acquireResources(driver.desc.resourceReqs)
driver.withResources(allocated)
//启动 ExecuterBackend
launchDriver(worker, driver)
waitingDrivers -= driver
launched = true
}
curPos = (curPos + 1) % numWorkersAlive
}
if (!launched && isClusterIdle) {
logWarning(s"Driver ${driver.id} requires more resource than any of Workers could have.")
}
}
startExecutorsOnWorkers()
}
696行
/**
* Schedule and launch executors on workers
*/
private def startExecutorsOnWorkers(): Unit = {
// Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
// in the queue, then the second app, etc.
for (app <- waitingApps) {
val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
// If the cores left is less than the coresPerExecutor,the cores left will not be allocated
if (app.coresLeft >= coresPerExecutor) {
// Filter out workers that don't have enough resources to launch an executor
val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
.filter(canLaunchExecutor(_, app.desc))
.sortBy(_.coresFree).reverse
if (waitingApps.length == 1 && usableWorkers.isEmpty) {
logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
}
val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
// Now that we've decided how many cores to allocate on each worker, let's allocate them
for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
分配资源给Executer ,并启动Executer
allocateWorkerResourceToExecutors(
app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
}
}
}
}
721 行
/**
* Allocate a worker's resources to one or more executors.
* @param app the info of the application which the executors belong to
* @param assignedCores number of cores on this worker for this application
* @param coresPerExecutor number of cores per executor
* @param worker the worker info
*/
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
assignedCores: Int,
coresPerExecutor: Option[Int],
worker: WorkerInfo): Unit = {
// If the number of cores per executor is specified, we divide the cores assigned
// to this worker evenly among the executors with no remainder.
// Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
for (i <- 1 to numExecutors) {
val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)
val exec = app.addExecutor(worker, coresToAssign, allocated)
启动和加载Executer
launchExecutor(worker, exec)
app.state = ApplicationState.RUNNING
}
}
816行
private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
worker.addExecutor(exec)
//向Worker.scala 发送LaunchExecutor ,启动Executer
worker.endpoint.send(LaunchExecutor(masterUrl, exec.application.id, exec.id,
exec.application.desc, exec.cores, exec.memory, exec.resources))
exec.application.driver.send(
ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
}
Worker.scala 549 行
// Worker 中真正启动Executer的逻辑代码
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_, resources_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
// Create the executor's working directory
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
}
// Create local dirs for the executor. These are passed to the executor via the
// SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.getOrElse(appId, {
val localRootDirs = Utils.getOrCreateLocalRootDirs(conf)
val dirs = localRootDirs.flatMap { dir =>
try {
val appDir = Utils.createDirectory(dir, namePrefix = "executor")
Utils.chmod700(appDir)
Some(appDir.getAbsolutePath())
} catch {
case e: IOException =>
logWarning(s"${e.getMessage}. Ignoring this directory.")
None
}
}.toSeq
if (dirs.isEmpty) {
throw new IOException("No subfolder can be created in " +
s"${localRootDirs.mkString(",")}.")
}
dirs
})
appDirectories(appId) = appLocalDirs
//Master 发送LaunchExecutor消息给Worker 启动Executer 最终由ExecutorRunner创建
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
webUi.scheme,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
workerUri,
conf,
appLocalDirs,
ExecutorState.LAUNCHING,
resources_)
executors(appId + "/" + execId) = manager
//调用manager.start()真正启动Executer的方法
manager.start()
coresUsed += cores_
memoryUsed += memory_
addResourcesUsed(resources_)
} catch {
case e: Exception =>
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
//启动成功回复Master,Executer启动完成
sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None))
}
}
ExecutorRunner.scala
private[worker] def start(): Unit = {
//创建线程,启动Start()方法
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run(): Unit = { fetchAndRunExecutor() }
}
workerThread.start()
// Shutdown hook that kills actors on shutdown.
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
// It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
// be `ExecutorState.LAUNCHING`. In this case, we should set `state` to `FAILED`.
if (state == ExecutorState.LAUNCHING) {
state = ExecutorState.FAILED
}
killProcess(Some("Worker shutting down")) }
}
148行
/**
* Download and run the executor described in our ApplicationDescription
*/
private def fetchAndRunExecutor(): Unit = {
try {
val resourceFileOpt = prepareResourcesFile(SPARK_EXECUTOR_PREFIX, resources, executorDir)
// Launch the process
val arguments = appDesc.command.arguments ++ resourceFileOpt.map(f =>
Seq("--resourcesFile", f.getAbsolutePath)).getOrElse(Seq.empty)
val subsOpts = appDesc.command.javaOpts.map {
Utils.substituteAppNExecIds(_, appId, execId.toString)
}
val subsCommand = appDesc.command.copy(arguments = arguments, javaOpts = subsOpts)
val builder = CommandUtils.buildProcessBuilder(subsCommand, new SecurityManager(conf),
memory, sparkHome.getAbsolutePath, substituteVariables)
val command = builder.command()
val redactedCommand = Utils.redactCommandLineArgs(conf, command.asScala)
.mkString("\"", "\" \"", "\"")
logInfo(s"Launch command: $redactedCommand")
builder.directory(executorDir)
builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
// In case we are running this from within the Spark Shell, avoid creating a "scala"
// parent process for the executor command
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
// Add webUI log urls
val baseUrl =
if (conf.get(UI_REVERSE_PROXY)) {
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
} else {
s"$webUiScheme$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
}
builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")
//启动进程
process = builder.start()
val header = "Spark Executor Command: %s\n%s\n\n".format(
redactedCommand, "=" * 40)
// Redirect its stdout and stderr to files
val stdout = new File(executorDir, "stdout")
stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
val stderr = new File(executorDir, "stderr")
Files.write(header, stderr, StandardCharsets.UTF_8)
stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
state = ExecutorState.RUNNING
worker.send(ExecutorStateChanged(appId, execId, state, None, None))
// Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
// or with nonzero exit code
val exitCode = process.waitFor()
state = ExecutorState.EXITED
val message = "Command exited with code " + exitCode
worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))
} catch {
case interrupted: InterruptedException =>
logInfo("Runner thread for executor " + fullId + " interrupted")
state = ExecutorState.KILLED
killProcess(None)
case e: Exception =>
logError("Error running executor", e)
state = ExecutorState.FAILED
killProcess(Some(e.toString))
}
}
CommandUtils.scala
/**
* Build a ProcessBuilder based on the given parameters.
* The `env` argument is exposed for testing.
*/
def buildProcessBuilder(
command: Command,
securityMgr: SecurityManager,
memory: Int,
sparkHome: String,
substituteArguments: String => String,
classPaths: Seq[String] = Seq.empty,
env: Map[String, String] = sys.env): ProcessBuilder = {
val localCommand = buildLocalCommand(
command, securityMgr, substituteArguments, classPaths, env)
val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
val builder = new ProcessBuilder(commandSeq: _*)
val environment = builder.environment()
for ((key, value) <- localCommand.environment) {
environment.put(key, value)
}
builder
}
60行
private def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
// SPARK-698: do not call the run.cmd script, as process.destroy()
// fails to kill a process tree on Windows
//调用该构造器
val cmd = new WorkerCommandBuilder(sparkHome, memory, command).buildCommand()
cmd.asScala ++ Seq(command.mainClass) ++ command.arguments
}
/**
* This class is used by CommandUtils. It uses some package-private APIs in SparkLauncher, and since
* Java doesn't have a feature similar to `private[spark]`, and we don't want that class to be
* public, needs to live in the same package as the rest of the library.
*/
private[spark] class WorkerCommandBuilder(sparkHome: String, memoryMb: Int, command: Command)
extends AbstractCommandBuilder {
childEnv.putAll(command.environment.asJava)
childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, sparkHome)
override def buildCommand(env: JMap[String, String]): JList[String] = {
val cmd = buildJavaCommand(command.classPathEntries.mkString(File.pathSeparator))
cmd.add(s"-Xmx${memoryMb}M")
command.javaOpts.foreach(cmd.add)
cmd
}
def buildCommand(): JList[String] = buildCommand(new JHashMap[String, String]())
}
启动Executer完成