目录
YarnCoarseGrainedExecutorBackend
spark源码版本3.1.2
spark提交命令
bin/spark-submit
--class org.apache.spark.examples.SparkPi
--master yarn
./examples/jars/spark-examples_2.12-3.1.2.jar 10
进入到spark-submit 可以看到程序入口类 org.apache.spark.deploy.SparkSubmi
if [ -z "${SPARK_HOME}" ]; then
source "$(dirname "$0")"/find-spark-home
fi
# disable randomized hash for string in Python 3.3+
export PYTHONHASHSEED=0
exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
SparkSubmit类
object SparkSubmit
main()
val submit = new SparkSubmit()
...
submit.doSubmit(args)
private[spark] class SparkSubmit
doSubmit(args: Array[String]): Unit={
...
appArgs.action match {
case SparkSubmitAction.SUBMIT => submit(appArgs,UninitLog)
}
}
submit(args: SparkSubmitArguments , uninitLog: Boolean): Unit = {
...
runMain(args,unintLog)
}
runMain(args: SparkSubmitArguments , uninitLog: Boolean): Unit ={
val (childArgs, childClasspath, sparkConf, childMainClass) =
prepareSubmitEnvironment(args)
...
// 判断mainClass 是否是 SparkApplication standalone
val app: SparkApplication = if
(classOf[SparkApplication].isAssignableFrom(mainClass)) {
// deploy=cluster YarnClusterApplication
mainClass.getConstructor().newInstance().asInstanceOf[SparkApplication]
} else {
// deploy=client jar 包中的main方法
new JavaMainApplication(mainClass)
}
...
app.start(childArgs.toArray, sparkConf)
}
val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
- clusterManager的赋值
// Set the cluster manager
val clusterManager: Int = args.master match {
case "yarn" => YARN
case m if m.startsWith("spark") => STANDALONE
case m if m.startsWith("mesos") => MESOS
case m if m.startsWith("k8s") => KUBERNETES
case m if m.startsWith("local") => LOCAL
case _ =>
error("Master must either be yarn or start with spark, mesos, k8s, or local")
-1
}
- deployMode ==CLIENT 时childMainClass 是传进来的自己编写的application.jar 的mainclass
if (deployMode == CLIENT) {
// childMainClass是jar包的main方法
childMainClass = args.mainClass
if (localPrimaryResource != null && isUserJar(localPrimaryResource)) {
childClasspath += localPrimaryResource
}
if (localJars != null) { childClasspath ++= localJars.split(",") }
}
- childMainClass的赋值org.apache.spark.deploy.yarn.YarnClusterApplication
// In yarn-cluster mode, use yarn.Client as a wrapper around the user class
if (isYarnCluster) {
childMainClass = YARN_CLUSTER_SUBMIT_CLASS
if (args.isPython) {
childArgs += ("--primary-py-file", args.primaryResource)
childArgs += ("--class", "org.apache.spark.deploy.PythonRunner")
} else if (args.isR) {
val mainFile = new Path(args.primaryResource).getName
childArgs += ("--primary-r-file", mainFile)
childArgs += ("--class", "org.apache.spark.deploy.RRunner")
} else {
if (args.primaryResource != SparkLauncher.NO_RESOURCE) {
childArgs += ("--jar", args.primaryResource)
}
childArgs += ("--class", args.mainClass)
}
if (args.childArgs != null) {
args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
}
}
private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
"org.apache.spark.deploy.yarn.YarnClusterApplication"
准备完提交环境,判断mainClass是SparkApplication 还是其他,YarnClusterApplication是Client类的内部类,继承SparkApplication
最后调用app.start方法。
Client类
调用YarnClusterApplication.start()方法,new Client().run()
new Client()中yarnClient被赋值
YarnClient.createYarnClient跳到hadoop源码中看
初始化了RMClient
初始化完Client类去调用run()方法
进入submitApplication()
然后容器的环境和提交的环境
将appContext提交到RM
val containerContext = createContainerLaunchContext(newAppResponse)
这个方法中将 commond =bin/java org.apache.spark.deploy.yarn.ApplicationMaster包装在amContainer,随着环境发送给RM
注:ApplicationMaster 是个进程
ApplicationMaster类
进入到ApplicationMaster的main方法中,调用master.run()
进入 master.run(), 判断是否是ClusterMode,如果是 runDriver()
进入到runDriver()
- 启动用户应用程序
进入到1.startUserApplication()
- 利用类加载器,加载userClass 得到main方法
- 启动线程,线程名为Driver
2.异步线程等待sparkContext被唤醒(spark初始化成功)
3.当sparkContext初始化完成后,进入判断sparkContext是否为null,如果初始化完成,ApplicationMaster向RM进行注册
registerAM(host, port, userConf, sc.ui.map(_.webUrl), appAttemptId)
4.创建分配器
createAllocator(driverRef, userConf, rpcEnv, appAttemptId, distCacheConf)
- 进入到方法,中有个方法,分配资源
YarnAllocator.scala
便利可用的container,去循环创建container ,并启动YarnCoarseGrainedExecutorBackend 进程
allocator.allocateResources()
--if(allocatedContainers.size > 0){
handleAllocatedContainers(allocatedContainers.asScala.toSeq)
}
--runAllocatedContainers(containersToUse)
-- for (container <- containersToUse) {
--if (launchContainers) {
launcherPool.execute(() => {
try {
new ExecutorRunnable(
Some(container),
conf,
sparkConf,
driverUrl,
executorId,
executorHostname,
containerMem,
containerCores,
appAttemptId.getApplicationId.toString,
securityMgr,
localResources,
rp.id
).run()
}
ExecutorRunnable.scala
- 初始化NodeManager的客户端
- 启动NodeManager的客户端
- 启动Container
def run(): Unit = {
logDebug("Starting Executor Container")
nmClient = NMClient.createNMClient()
nmClient.init(conf)
nmClient.start()
startContainer()
}
- 进入到startContainer(), 拼接command,加入到Container上下文环境
val commands = prepareCommand()
ctx.setCommands(commands.asJava)
...
nmClient.startContainer(container.get, ctx)
- 进入prepareCommand,启动一个进程 bin/java org.apache.spark.executor.YarnCoarseGrainedExecutorBackend
val commands = prefixEnv ++
Seq(Environment.JAVA_HOME.$$() + "/bin/java", "-server") ++
javaOpts ++
Seq("org.apache.spark.executor.YarnCoarseGrainedExecutorBackend",
"--driver-url", masterAddress,
"--executor-id", executorId,
"--hostname", hostname,
"--cores", executorCores.toString,
"--app-id", appId,
"--resourceProfileId", resourceProfileId.toString) ++
userClassPath ++
Seq(
s"1>${ApplicationConstants.LOG_DIR_EXPANSION_VAR}/stdout",
s"2>${ApplicationConstants.LOG_DIR_EXPANSION_VAR}/stderr")
YarnCoarseGrainedExecutorBackend
def main(args: Array[String]): Unit = {
val createFn: (RpcEnv, CoarseGrainedExecutorBackend.Arguments, SparkEnv, ResourceProfile) =>
CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env, resourceProfile) =>
new YarnCoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
arguments.bindAddress, arguments.hostname, arguments.cores, arguments.userClassPath.toSeq,
env, arguments.resourcesFileOpt, resourceProfile)
}
val backendArgs = CoarseGrainedExecutorBackend.parseArguments(args,
this.getClass.getCanonicalName.stripSuffix("$"))
CoarseGrainedExecutorBackend.run(backendArgs, createFn)
System.exit(0)
}
进入到CoarseGrainedExecutorBackend.run(backendArgs, createFn),发现有个rpc请求,这涉及到通信,后面说
env.rpcEnv.setupEndpoint("Executor",
backendCreateFn(env.rpcEnv, arguments, env, cfg.resourceProfile))
最后调用receive方法,创建了Executor对象
override def receive: PartialFunction[Any, Unit] = {
case RegisteredExecutor =>
logInfo("Successfully registered with driver")
try {
executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false,
resources = _resources)
driver.get.send(LaunchedExecutor(executorId))
} catch {
case NonFatal(e) =>
exitExecutor(1, "Unable to create executor due to " + e.getMessage, e)
}