Spark的任务, 生产环境中一般提交到Yarn上执行. 具体流程如下图所示
1、client提交任务到RM.
2、RM启动AM.
3、AM启动Driver线程, 并向RM申请资源.
4、RM返回可用资源列表.
5、AM通过nmClient启动Container, 并且启动ExecutorBackend后台进程.
6、Executor反向注册给Driver
7、Executor启动任务
我们通过截取部分源码来展示上诉7个过程.
1、client提交任务到RM.
从spark-submit.sh中找到第一个类, 找到main函数入口
①main
//所在类org.apache.spark.deploy.SparkSubmit
override def main(args: Array[String]): Unit = {
val appArgs = new SparkSubmitArguments(args)
// appArgs.action初始化
// action = Option(action).getOrElse(SUBMIT)
appArgs.action match {
case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
}
}
②submit(appArgs, uninitLog)
private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
def doRunMain(): Unit = {
if (args.proxyUser != null) {
} else {
runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose)
}
}
if (args.isStandaloneCluster && args.useRest) {
} else {
doRunMain()
}
}
③runMain(childArgs, childClasspath, sparkConf, childMainClass, args.verbose)
yarn模式下childMainClass就是类"org.apache.spark.deploy.yarn.YarnClusterApplication"
private def runMain(
childArgs: Seq[String],
childClasspath: Seq[String],
sparkConf: SparkConf,
childMainClass: String,
verbose: Boolean): Unit = {
var mainClass: Class[_] = null
try {
mainClass = Utils.classForName(childMainClass)
}
val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
mainClass.newInstance().asInstanceOf[SparkApplication]
}
try {
app.start(childArgs.toArray, sparkConf)
}
}
④app.start(childArgs.toArray, sparkConf)
app是通过反射, 夹在类"org.apache.spark.deploy.yarn.YarnClusterApplication"
private[spark] class YarnClusterApplication extends SparkApplication {
override def start(args: Array[String], conf: SparkConf): Unit = {
new Client(new ClientArguments(args), conf).run()
}
}
⑤new Client(new ClientArguments(args), conf).run()
这里有个点, 就是初始化Client, 我们可以看到
def run(): Unit = {
this.appId = submitApplication()
}
⑥this.appId = submitApplication()
yarnClient向Yarn提交任务, 可以看到提交的其实是command命令, RM会找一个NM解析命令并启动, 这就到了我们的下一步
def submitApplication(): ApplicationId = {
var appId: ApplicationId = null
try {
launcherBackend.connect()
//YarnClient yarnClient= new YarnClientImpl();
yarnClient.init(hadoopConf)
yarnClient.start()
// Get a new application from our RM
val newApp = yarnClient.createApplication()
val newAppResponse = newApp.getNewApplicationResponse()
appId = newAppResponse.getApplicationId()
// Set up the appropriate contexts to launch our AM
// createContainerLaunchContext()方法封装了一个command命令, 集群模式下启动"org.apache.spark.deploy.yarn.ApplicationMaster"类
val containerContext = createContainerLaunchContext(newAppResponse)
val appContext = createApplicationSubmissionContext(newApp, containerContext)
// yarnClient向Yarn提交任务, 可以看到提交的其实是command命令, RM会找一个NM解析命令并启动, 这就到了我们的下一步
yarnClient.submitApplication(appContext)
launcherBackend.setAppId(appId.toString)
reportLauncherState(SparkAppHandle.State.SUBMITTED)
appId
}
}
3、AM启动Driver线程, 并向RM申请资源.
①main
启动"org.apache.spark.deploy.yarn.ApplicationMaster"类, 找到main方法
def main(args: Array[String]): Unit = {
master = new ApplicationMaster(amArgs)
System.exit(master.run())
}
final def run(): Int = {
doAsUser {
runImpl()
}
}
②runImpl()
集群模式下, 走runDriver()方法
private def runImpl(): Unit = {
try {
if (isClusterMode) {
runDriver()
} else {
runExecutorLauncher()
}
}
}
③runDriver()
这个方法实现了两个功能, 首先是启动用户类(用户写的spark任务, 也是所谓的Driver线程), 其次是向RM申请资源
private def runDriver(): Unit = {
userClassThread = startUserApplication()
val totalWaitTime = sparkConf.get(AM_MAX_WAIT_TIME)
try {
val sc = ThreadUtils.awaitResult(sparkContextPromise.future,
Duration(totalWaitTime, TimeUnit.MILLISECONDS))
if (sc != null) {
rpcEnv = sc.env.rpcEnv
val driverRef = createSchedulerRef(
sc.getConf.get("spark.driver.host"),
sc.getConf.get("spark.driver.port"))
registerAM(sc.getConf, rpcEnv, driverRef, sc.ui.map(_.webUrl))
registered = true
}
resumeDriver()
userClassThread.join()
}
}
④startUserApplication()
通过下面代码可以看到,Driver其实是一个线程, 启动的就是我们的spark任务, 因为他是通过反射加载的.
private def startUserApplication(): Thread = {
val mainMethod = userClassLoader.loadClass(args.userClass)
.getMethod("main", classOf[Array[String]])
val userThread = new Thread {
override def run() {
try {
mainMethod.invoke(null, userArgs.toArray)
}
}
}
userThread.setContextClassLoader(userClassLoader)
userThread.setName("Driver")
userThread.start()
userThread
}
⑤registerAM(sc.getConf, rpcEnv, driverRef, sc.ui.map(_.webUrl))
这个方法主要实现了向RM注册这个AM,并申请资源
private def registerAM(
_sparkConf: SparkConf,
_rpcEnv: RpcEnv,
driverRef: RpcEndpointRef,
uiAddress: Option[String]) = {
val appId = client.getAttemptId().getApplicationId().toString()
val attemptId = client.getAttemptId().getAttemptId().toString()
val historyAddress = ApplicationMaster
.getHistoryServerAddress(_sparkConf, yarnConf, appId, attemptId)
val driverUrl = RpcEndpointAddress(
_sparkConf.get("spark.driver.host"),
_sparkConf.get("spark.driver.port").toInt,
CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
allocator = client.register(driverUrl,
driverRef,
yarnConf,
_sparkConf,
uiAddress,
historyAddress,
securityMgr,
localResources)
// Initialize the AM endpoint *after* the allocator has been initialized. This ensures
// that when the driver sends an initial executor request (e.g. after an AM restart),
// the allocator is ready to service requests.
rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef))
allocator.allocateResources()
reporterThread = launchReporterThread()
}
⑥client.register(driverUrl,
向RM注册, 返回YarnAllocator对象
def register(
driverUrl: String,
driverRef: RpcEndpointRef,
conf: YarnConfiguration,
sparkConf: SparkConf,
uiAddress: Option[String],
uiHistoryAddress: String,
securityMgr: SecurityManager,
localResources: Map[String, LocalResource]
): YarnAllocator = {
//通过AMRMClient向RM注册
amClient = AMRMClient.createAMRMClient()
amClient.init(conf)
amClient.start()
this.uiHistoryAddress = uiHistoryAddress
logInfo("Registering the ApplicationMaster")
synchronized {
amClient.registerApplicationMaster(Utils.localHostName(), 0, trackingUrl)
registered = true
}
new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, getAttemptId(), securityMgr,
localResources, new SparkRackResolver())
}
⑦allocator.allocateResources()
获取可用container.
def allocateResources(): Unit = synchronized {
updateResourceRequests()
val progressIndicator = 0.1f
// Poll the ResourceManager. This doubles as a heartbeat if there are no pending container
// requests.
val allocateResponse = amClient.allocate(progressIndicator)
val allocatedContainers = allocateResponse.getAllocatedContainers()
if (allocatedContainers.size > 0) {
handleAllocatedContainers(allocatedContainers.asScala)
}
val completedContainers = allocateResponse.getCompletedContainersStatuses()
if (completedContainers.size > 0) {
processCompletedContainers(completedContainers.asScala)
}
}
⑧handleAllocatedContainers(allocatedContainers.asScala)
这里代码, 主要实现了节点本地化, 机架本地化策略.
def handleAllocatedContainers(allocatedContainers: Seq[Container]): Unit = {
val containersToUse = new ArrayBuffer[Container](allocatedContainers.size)
// Match incoming requests by host
val remainingAfterHostMatches = new ArrayBuffer[Container]
for (allocatedContainer <- allocatedContainers) {
matchContainerToRequest(allocatedContainer, allocatedContainer.getNodeId.getHost,
containersToUse, remainingAfterHostMatches)
}
// Match remaining by rack
val remainingAfterRackMatches = new ArrayBuffer[Container]
for (allocatedContainer <- remainingAfterHostMatches) {
val rack = resolver.resolve(conf, allocatedContainer.getNodeId.getHost)
matchContainerToRequest(allocatedContainer, rack, containersToUse,
remainingAfterRackMatches)
}
// Assign remaining that are neither node-local nor rack-local
val remainingAfterOffRackMatches = new ArrayBuffer[Container]
for (allocatedContainer <- remainingAfterRackMatches) {
matchContainerToRequest(allocatedContainer, ANY_HOST, containersToUse,
remainingAfterOffRackMatches)
}
if (!remainingAfterOffRackMatches.isEmpty) {
for (container <- remainingAfterOffRackMatches) {
internalReleaseContainer(container)
}
}
runAllocatedContainers(containersToUse)
}
⑨runAllocatedContainers(containersToUse)
这个方法主要实现了new ExecutorRunnable().run()
private def runAllocatedContainers(containersToUse: ArrayBuffer[Container]): Unit = {
for (container <- containersToUse) {
executorIdCounter += 1
val executorHostname = container.getNodeId.getHost
val containerId = container.getId
val executorId = executorIdCounter.toString
assert(container.getResource.getMemory >= resource.getMemory)
def updateInternalState(): Unit = synchronized {
runningExecutors.add(executorId)
numExecutorsStarting.decrementAndGet()
executorIdToContainer(executorId) = container
containerIdToExecutorId(container.getId) = executorId
val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
new HashSet[ContainerId])
containerSet += containerId
allocatedContainerToHostMap.put(containerId, executorHostname)
}
if (runningExecutors.size() < targetNumExecutors) {
numExecutorsStarting.incrementAndGet()
if (launchContainers) {
launcherPool.execute(new Runnable {
override def run(): Unit = {
try {
new ExecutorRunnable(
Some(container),
conf,
sparkConf,
driverUrl,
executorId,
executorHostname,
executorMemory,
executorCores,
appAttemptId.getApplicationId.toString,
securityMgr,
localResources
).run()
updateInternalState()
}
}
})
}
}
}
}
⑩ExecutorRunnable().run()
def run(): Unit = {
logDebug("Starting Executor Container")
nmClient = NMClient.createNMClient()
nmClient.init(conf)
nmClient.start()
startContainer()
}
11.startContainer()
依然管用套路, 封装命令, 在container中启动进程
def startContainer(): java.util.Map[String, ByteBuffer] = {
val ctx = Records.newRecord(classOf[ContainerLaunchContext])
.asInstanceOf[ContainerLaunchContext]
val env = prepareEnvironment().asJava
ctx.setLocalResources(localResources.asJava)
ctx.setEnvironment(env)
val credentials = UserGroupInformation.getCurrentUser().getCredentials()
val dob = new DataOutputBuffer()
credentials.writeTokenStorageToStream(dob)
ctx.setTokens(ByteBuffer.wrap(dob.getData()))
//这里准备的类是"org.apache.spark.executor.CoarseGrainedExecutorBackend"
val commands = prepareCommand()
//封装
ctx.setCommands(commands.asJava)
ctx.setApplicationACLs( YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr).asJava)
// Send the start request to the ContainerManager
try {
nmClient.startContainer(container.get, ctx)
}
}
6、Executor反向注册给Driver
启动类"org.apache.spark.executor.CoarseGrainedExecutorBackend", 找到main函数
①main函数入口
def main(args: Array[String]) {
run(driverUrl, executorId, hostname, cores, appId, workerUrl, userClassPath)
}
②run(), 这个函数完成了backend的设置
private def run(
driverUrl: String,
executorId: String,
hostname: String,
cores: Int,
appId: String,
workerUrl: Option[String],
userClassPath: Seq[URL]) {
// 设置环境
val env = SparkEnv.createExecutorEnv(
driverConf, executorId, hostname, cores, cfg.ioEncryptionKey, isLocal = false)
//注册名为"Exectuor"的终端, 通信.
env.rpcEnv.setupEndpoint("Executor", new CoarseGrainedExecutorBackend(
env.rpcEnv, driverUrl, executorId, hostname, cores, userClassPath, env))
workerUrl.foreach { url =>
env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
}
env.rpcEnv.awaitTermination()
}
}
类"org.apache.spark.executor.CoarseGrainedExecutorBackend"是一个backend, 是有生命周期的.
constructor -> onStart -> receive* -> onStop
constructor: 上面的run函数
onStart: 向Driver注册executor
override def onStart() {
rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
// This is a very fast action so we can use "ThreadUtils.sameThread"
driver = Some(ref)
ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls))
}
}
receive: 当backend接到消息后, 任务处理
注册成功后, 会实例化Executor.
当得到启动任务指令后, 反序列化, 启动task执行.
override def receive: PartialFunction[Any, Unit] = {
// 注册成功
case RegisteredExecutor =>
try {
executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
}
// 启动task
case LaunchTask(data) =>
if (executor == null) {
exitExecutor(1, "Received LaunchTask command but executor was null")
} else {
val taskDesc = TaskDescription.decode(data.value)
logInfo("Got assigned task " + taskDesc.taskId)
executor.launchTask(this, taskDesc)
}
}
上诉流程基本就是Spark的任务提交流程总览.
我们再总结下Yarn集群模式下, 使用spark-submit提交的任务流程.
1、client提交任务到RM.
2、RM启动AM.
3、AM启动Driver线程, 并向RM申请资源.
4、RM返回可用资源列表.
5、AM通过nmClient启动Container, 并且启动CoraseGrainedExecutorBackend后台进程.
6、Executor反向注册给Driver
7、Executor启动任务