这里解读当sparksubmit提交模式为Yarn Cluster模式时的启动流程。
SparkSubmit类的runMain()中执行到start()时,本地模式会进入本地提交的--class类的main中开始执行。
// 启动实例
app.start(childArgs.toArray, sparkConf)
而Yarn Cluster模式,在prepareSubmitEnvironment()中准备运行环境时有判断过,所以start()其实调用的是org.apache.spark.deploy.yarn.YarnClusterApplication类的start()。
// In yarn-cluster mode, use yarn.Client as a wrapper around the user class
// yarn-cluster模式,使用yarn.client作为用户提交类的包装执行器
if (isYarnCluster) {
// object SparkSubmit中有定义为"org.apache.spark.deploy.yarn.YarnClusterApplication"
childMainClass = YARN_CLUSTER_SUBMIT_CLASS
...
}
// 遍历所有args参数,添加到子类参数中
if (args.childArgs != null) {
args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
}
}
YarnClusterApplication
YarnClusterApplication类在org.apache.spark.deploy.yarn.Client类下,其实也就是加载运行环境的资源到运行服务器本地,然后通过Client类的run()运行。
// 同样继承了SparkApplication,重写了start()
private[spark] class YarnClusterApplication extends SparkApplication {
override def start(args: Array[String], conf: SparkConf): Unit = {
// SparkSubmit would use yarn cache to distribute files & jars in yarn mode,
// so remove them from sparkConf here for yarn mode.
// yarn模式使用缓存来分发jars和文件,所以移除之前spark的配置
// 可以回头看看prepareSubmitEnvironment()运行环境准备,各种部署模式设置相应参数的方法options()
conf.remove("spark.jars")
conf.remove("spark.files")
// 构建client实例,而首先又构建了ClientArguments实例解析参数
new Client(new ClientArguments(args), conf).run()
}
}
ClientArguments
就是加载代码和jars、参数,jar,class,args。
// TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
private[spark] class ClientArguments(args: Array[String]) {
var userJar: String = null
var userClass: String = null
var primaryPyFile: String = null
var primaryRFile: String = null
var userArgs: ArrayBuffer[String] = new ArrayBuffer[String]()
parseArgs(args.toList)
// 解析传入的参数
private def parseArgs(inputArgs: List[String]): Unit = {
var args = inputArgs
while (!args.isEmpty) {
args match {
case ("--jar") :: value :: tail =>
userJar = value
args = tail
case ("--class") :: value :: tail =>
userClass = value
args = tail
case ("--primary-py-file") :: value :: tail =>
primaryPyFile = value
args = tail
case ("--primary-r-file") :: value :: tail =>
primaryRFile = value
args = tail
case ("--arg") :: value :: tail =>
userArgs += value
args = tail
case Nil =>
case _ =>
throw new IllegalArgumentException(getUsageMessage(args))
}
}
// pyfile和Rfile不能同时设置
if (primaryPyFile != null && primaryRFile != null) {
throw new IllegalArgumentException("Cannot have primary-py-file and primary-r-file" +
" at the same time")
}
}
private def getUsageMessage(unknownParam: List[String] = null): String = {
val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else ""
message +
s"""
|Usage: org.apache.spark.deploy.yarn.Client [options]
|Options:
| --jar JAR_PATH Path to your application's JAR file (required in yarn-cluster
| mode)
| --class CLASS_NAME Name of your application's main class (required)
| --primary-py-file A main Python file
| --primary-r-file A main R file
| --arg ARG Argument to be passed to your application's main class.
| Multiple invocations are possible, each will be passed in order.
""".stripMargin
}
}
Client
直接进入Client的run()。
private[spark] class Client(
val args: ClientArguments,
val sparkConf: SparkConf)
extends Logging {
...
/**
* Submit an application to the ResourceManager.
* If set spark.yarn.submit.waitAppCompletion to true, it will stay alive
* reporting the application's status until the application has exited for any reason.
* Otherwise, the client process will exit after submission.
* If the application finishes with a failed, killed, or undefined status,
* throw an appropriate SparkException.
*/
// 向RM提交app
def run(): Unit = {
// 提交app获取id
// spark.yarn.submit.waitAppCompletion设置为true,进程会保存存活并报告app状态,直到app完成
// 如果fail,kill级undefined状态退出,会抛出异常
this.appId = submitApplication()
// 监控application状态
if (!launcherBackend.isConnected() && fireAndForget) {
val report = getApplicationReport(appId)
val state = report.getYarnApplicationState
logInfo(s"Application report for $appId (state: $state)")
logInfo(formatReportDetails(report))
if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
throw new SparkException(s"Application $appId finished with status: $state")
}
} else {
val YarnAppReport(appState, finalState, diags) = monitorApplication(appId)
if (appState == YarnApplicationState.FAILED || finalState == FinalApplicationStatus.FAILED) {
diags.foreach { err =>
logError(s"Application diagnostics message: $err")
}
throw new SparkException(s"Application $appId finished with failed status")
}
if (appState == YarnApplicationState.KILLED || finalState == FinalApplicationStatus.KILLED) {
throw new SparkException(s"Application $appId is killed")
}
if (finalState == FinalApplicationStatus.UNDEFINED) {
throw new SparkException(s"The final status of application $appId is undefined")
}
}
}
}
submitApplication()
看看提交app获取id的过程。
def submitApplication(): ApplicationId = {
var appId: ApplicationId = null
try {
// 初始化launcherBackend,与launcherServer建立连接
launcherBackend.connect()
// 初始化yarnClinet
yarnClient.init(hadoopConf)
// 启动yarnClient,连接到集群,获取节点信息
yarnClient.start()
// 输出节点个数
logInfo("Requesting a new application from cluster with %d NodeManagers"
.format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
// Get a new application from our RM
// 调用接口向RM创建一个app
val newApp = yarnClient.createApplication()
// 获取app请求的响应
val newAppResponse = newApp.getNewApplicationResponse()
// 获取app的id
appId = newAppResponse.getApplicationId()
// 建立客户端,用于与hadoop通讯
new CallerContext("CLIENT", sparkConf.get(APP_CALLER_CONTEXT),
Option(appId.toString)).setCurrentContext()
// Verify whether the cluster has enough resources for our AM
// 验证集群是否有足够资源运行AM
verifyClusterResources(newAppResponse)
// Set up the appropriate contexts to launch our AM
// 启动Container用于启动AM,并设置环境变量
val containerContext = createContainerLaunchContext(newAppResponse)
val appContext = createApplicationSubmissionContext(newApp, containerContext)
// Finally, submit and monitor the application
logInfo(s"Submitting application $appId to ResourceManager")
// 提交app,通过appContext获取资源情况
yarnClient.submitApplication(appContext)
// 监控提交的状况
launcherBackend.setAppId(appId.toString)
reportLauncherState(SparkAppHandle.State.SUBMITTED)
// 返回appId
appId
} catch {
case e: Throwable =>