一 、LaunchDriver
创建driverRunner
内部启动一个线程(driverRunner.start)启动DriverRunner
- 创建driver的工作目录
- 构建ProcessBuilder,传入了driver的启动命令,需要的内存大小等信息
- 启动driver
二、LaunchExecutor
1、创建executor的工作目录
2、创建ExecutorRunner
3、内部启动一个线程(ExecutorRunner.start)启动ExecutorRunner
调用fetchAndRunExecutor()
1) 封装一个ProcessBuilder
2) 调用Process的waitFor()方法,启动executor进程
3) executor执行完之后拿到返回状态
4) 向executorrunner线程所属的worker actor,发送ExecutorStateChanged消息
4、向master返回一个ExecutorStateChanged消息
LaunchDriver
case LaunchDriver(driverId, driverDesc) => {
logInfo(s"Asked to launch driver $driverId")
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
akkaUrl)
drivers(driverId) = driver
// TODO
driver.start()
coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem
}
-> DriverRunner.start 创建一个线程
def start() = {
new Thread("DriverRunner for " + driverId) {
override def run() {
try {
// 1、创建driver的工作目录
val driverDir = createWorkingDirectory()
// 2、下载用户上传的jar
val localJarFilename = downloadUserJar(driverDir)
def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
}
// If we add ability to submit multiple jars they should also be added here
// TODO 构建ProcessBuilder,传入了driver的启动命令,需要的内存大小等信息
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
sparkHome.getAbsolutePath, substituteVariables)
// TODO 启动driver
launchDriver(builder, driverDir, driverDesc.supervise)
}
catch {
case e: Exception => finalException = Some(e)
}
val state =
if (killed) {
DriverState.KILLED
} else if (finalException.isDefined) {
DriverState.ERROR
} else {
finalExitCode match {
case Some(0) => DriverState.FINISHED
case _ => DriverState.FAILED
}
}
finalState = Some(state)
// 这个DriverRunner这个线程,向它所属worker的actor发送一个DriverStateChanged的事件
worker ! DriverStateChanged(driverId, state, finalException)
}
}.start()
}
-> launchDriver
private def launchDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean) {
builder.directory(baseDir)
def initialize(process: Process) = {
// Redirect stdout and stderr to files
val stdout = new File(baseDir, "stdout")
CommandUtils.redirectStream(process.getInputStream, stdout)
val stderr = new File(baseDir, "stderr")
val header = "Launch Command: %s\n%s\n\n".format(
builder.command.mkString("\"", "\" \"", "\""), "=" * 40)
Files.append(header, stderr, UTF_8)
CommandUtils.redirectStream(process.getErrorStream, stderr)
}
// TODO
runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
}
LaunchExecutor
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
// Create the executor's working directory
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
}
// Create local dirs for the executor. These are passed to the executor via the
// SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.get(appId).getOrElse {
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
Utils.createDirectory(dir).getAbsolutePath()
}.toSeq
}
appDirectories(appId) = appLocalDirs
// 创建ExecutorRunner,将参数都放入其中,然后通过他启动Executor
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
akkaUrl,
conf,
appLocalDirs, ExecutorState.LOADING)
// 将ExecutorRunner加入本地缓存
executors(appId + "/" + execId) = manager
// TODO 调用ExecutorRunner的start 方法来启动Executor java子进程 CoarseGrainedExecutorBackend
manager.start()
coresUsed += cores_
memoryUsed += memory_
// 向master返回一个ExecutorStateChanged消息
master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
} catch {
case e: Exception => {
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None)
}
}
}
-> manager.start() -> ExecutorRunner.start
def start() {
/**
* 先创建一个线程对象,然后通过一个线程来启动一个java子进程 fetchAndRunExecutor
* 使用线程不用阻塞等待,直接就可以往下执行,主线程和启动Executor并发执行,增加并行度
*/
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run() {
fetchAndRunExecutor()
}
}
workerThread.start()
// Shutdown hook that kills actors on shutdown.
shutdownHook = new Thread() {
override def run() {
killProcess(Some("Worker shutting down"))
}
}
Runtime.getRuntime.addShutdownHook(shutdownHook)
}
->fetchAndRunExecutor
def fetchAndRunExecutor() {
try {
// Launch the process
// 封装一个ProcessBuilder
val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
sparkHome.getAbsolutePath, substituteVariables)
val command = builder.command()
logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
builder.directory(executorDir)
builder.environment.put("SPARK_LOCAL_DIRS", appLocalDirs.mkString(","))
// In case we are running this from within the Spark Shell, avoid creating a "scala"
// parent process for the executor command
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
// Add webUI log urls
val baseUrl =
s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")
/**
* 真正启动子进程
*/
process = builder.start()
val header = "Spark Executor Command: %s\n%s\n\n".format(
command.mkString("\"", "\" \"", "\""), "=" * 40)
// Redirect its stdout and stderr to files
val stdout = new File(executorDir, "stdout")
stdoutAppender = FileAppender(process.getInputStream, stdout, conf)
val stderr = new File(executorDir, "stderr")
Files.write(header, stderr, UTF_8)
stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
// Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
// or with nonzero exit code
// 调用Process的waitFor()方法,启动executor进程
val exitCode = process.waitFor()
// executor执行完之后拿到返回状态
state = ExecutorState.EXITED
val message = "Command exited with code " + exitCode
// 向executorrunner线程所属的worker actor,发送ExecutorStateChanged消息
worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode))
} catch {
case interrupted: InterruptedException => {
logInfo("Runner thread for executor " + fullId + " interrupted")
state = ExecutorState.KILLED
killProcess(None)
}
case e: Exception => {
logError("Error running executor", e)
state = ExecutorState.FAILED
killProcess(Some(e.toString))
}
}
}