1,在sparkContext初始化完createTaskScheduler()调用TaskSchedulerImpl.start()
//在TaskSchedulerImpl在DAGScheduler的构造函数中设置DAGScheduler引用后启动TaskScheduler
_taskScheduler.start()
2,进入TaskSchedulerImpl.start()方法
override def start() {
//backend是通过将initialize,将SparkDeploySchedulerBackend注入给backend成员
backend.start()
//spark.speculation:如果设置为“true”,则tasks会推测性执行。 这意味着如果一个或多个任务在一个阶段缓慢运行,它们将被重新启动。
if (!isLocal&& conf.getBoolean("spark.speculation", false)) {
logInfo("Starting speculative executionthread")
speculationScheduler.scheduleAtFixedRate(new Runnable{
override def run(): Unit = Utils.tryOrStopSparkContext(sc) {
checkSpeculatableTasks()
}
}, SPECULATION_INTERVAL_MS, SPECULATION_INTERVAL_MS, TimeUnit.MILLISECONDS)
}
}
3,进入SparkDeploySchedulerBackend.start()方法:该方法完成一个很重要的功能就是会在每个节点上启动CoarseGrainedExecutorBackend
override def start() {
super.start()
。。。
}
4,先看一下super.start():即CoarseGrainedSchedulerBackend的start方法:
//由SparkDeploySchedulerBackend调用进来的
override def start() {
val properties= new ArrayBuffer[(String, String)]
//scheduler是TaskSchedulerImpl
//以spark.开始的所有属性都会重新到当前ArrayBuffer中
for ((key, value) <- scheduler.sc.conf.getAll) {
if (key.startsWith("spark.")) {
properties += ((key, value))
}
}
// TODO(prashant) send conf instead of properties
//这个rpcEnv是SparkEnv中创建出来的 。 ENDPOINT_NAME ="CoarseGrainedScheduler"
driverEndpoint = rpcEnv.setupEndpoint(ENDPOINT_NAME, createDriverEndpoint(properties))
}
5,在RpcEnv中生成一个DriverEndpoint对应的RpcEndpointRef
protected def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = {
new DriverEndpoint(rpcEnv, properties)
}
6,查看DriverEndpoint初始化
//会在该类start方法被初始化出来,
// 这个rpcEnv是SparkEnv中创建出来的。 ENDPOINT_NAME = "CoarseGrainedScheduler",sparkProperties是将sparkConf中key以spark开始的属性都会被放进来
class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
extends ThreadSafeRpcEndpoint withLogging {
// If this DriverEndpoint is changed to support multiplethreads,
// then this may need to be changed sothat we don't share the serializer
// instance across threads
//使用java的系列化可以调优
private val ser =SparkEnv.get.closureSerializer.newInstance()
override protected def log =CoarseGrainedSchedulerBackend.this.log
protected val addressToExecutorId = new HashMap[RpcAddress, String]
private val reviveThread =
ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
override def onStart() {
// Periodically revive offers to allow delay schedulingto work
//以毫秒为单位获取时间参数,如果未设置,则返回到默认值。如果没有提供后缀,则假定为毫秒。如 : “1s”会变成1000
//调度程序为了运行任务而重新提供work资源的间隔长度。
val reviveIntervalMs= conf.getTimeAsMs("spark.scheduler.revive.interval", "1s")
//每隔1秒,给自己发一个ReviveOffers
reviveThread.scheduleAtFixedRate(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
Option(self).foreach(_.send(ReviveOffers))
}
}, 0, reviveIntervalMs, TimeUnit.MILLISECONDS)
}
6,发送给receive偏函数
override def receive: PartialFunction[Any, Unit] ={
。。。。
case ReviveOffers=>
makeOffers()
case KillTask(taskId, executorId, interruptThread) =>
7, makeOffers()是提交taskSet执行的关键方法,它会被DriverEndpoint每秒调用一次,如果有任何runJob等产生task的动作就会被提交到各个节点去执行==》由于是初始化,先记住这个方法是会不断被触发的,等运行runJob(),再跟踪进来
// Make fake resourceoffers on all executors
// 在逻辑上,让所有Executor都成为计算资源的提供者
private def makeOffers() {
// Filter out executors under killing
// executorDataMap:HashMap[String, ExecutorData]保存executorId和ExecutorData的,
// executorDataMap的值是在,CoarseGrainedExecutorBackend这个RpcEndpoint初始化时在onStart方法,会给DriverEndpoint发送RegisterExecutor注入进去的
//CoarseGrainedExecutorBackend的初始化是由SparkDeploySchedulerBackend.start方法,触发去执行的,确保在执行某个任务时没有杀死executor。
val activeExecutors= executorDataMap.filterKeys(executorIsAlive)
//生成有所有aliver的Executor元信息组成的序列
val workOffers= activeExecutors.map { case(id, executorData)=>
new WorkerOffer(id, executorData.executorHost, executorData.freeCores)
}.toSeq
//TaskSchedulerImpl.resourceOffers生成资源分配的二维数组:Seq[ArrayBuffer[TaskDescription](o.cores)],会被resourceOfferSingleTaskSet调用,
// 将元素填允进去,然后返回给lauchTask执行 ,并以此为基础进行Tasks加载、执行
// 如果任务描述不为空,就是有任务,需要launchTasks()运行
launchTasks(scheduler.resourceOffers(workOffers))
}
===》由于是SparkDeploySchedulerBackend.start()方法,调用super.start()进到这个方法上的,此时CoarseGrainedExecutorBacked还未生成,所以executorDataMap并没有任何元素
8,再回到SparkDeploySchedulerBackend.start()
override def start() {
super.start()
launcherBackend.connect()
// The endpoint for executors to talk to us 得到sparkDriver对应的url
//spark.driver.host 、 spark.driver.port是在SparkEnv中设置进去的
//spark://CoarseGrainedScheduler@192.168.1.152:49972,实际上就是对应DriverEndpointRef
val driverUrl = rpcEnv.uriOf(SparkEnv.driverActorSystemName,
RpcAddress(sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt),
CoarseGrainedSchedulerBackend.ENDPOINT_NAME)
//是给CoarseGrainedExecutorBackend使用的
val args = Seq(
"--driver-url", driverUrl,
"--executor-id", "{{EXECUTOR_ID}}",
"--hostname", "{{HOSTNAME}}",
"--cores", "{{CORES}}",
"--app-id", "{{APP_ID}}",
"--worker-url", "{{WORKER_URL}}")
//默认就是得到%spark_home%/conf/spark-defaults.conf里面的值,我为debugger,
CoarseGrainedExecutorBackend进程,这里面加了jvm的debugger参数
val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
.map(Utils.splitCommandString).getOrElse(Seq.empty)
val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath")
.map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
val libraryPathEntries = sc.conf.getOption("spark.executor.extraLibraryPath")
.map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
// When testing, expose the parent class path to the child. This is processed by
// compute-classpath.{cmd,sh} and makes all needed jars available to child processes
// when the assembly is built with the "*-provided" profiles enabled.
val testingClassPath =
if (sys.props.contains("spark.testing")) {
sys.props("java.class.path").split(java.io.File.pathSeparator).toSeq
} else {
Nil
}
// Start executors with a few necessary configs for registering with the scheduler
//使用一些必要的配置启动执行程序,以便与调度程序一起注册,其中SparkConf.isExecutorStartupConf
:表示配制信息是否要在启动时给Executor节点
//如果配制中有`spark.*.port` or `spark.port.*`,spark.akka,spark.auth,
spark.ssl,spark.rpc,开始的key会被变成
//s"-D$k=$v" 如:Seq[String]("-Dspark.driver.port=49972")
val sparkJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
val javaOpts = sparkJavaOpts ++ extraJavaOpts
//Command(org.apache.spark.executor.CoarseGrainedExecutorBackend,
// List(--driver-url, spark://CoarseGrainedScheduler@192.168.1.152:49972,
// --executor-id, {{EXECUTOR_ID}},
// --hostname, {{HOSTNAME}},
// --cores, {{CORES}}, --app-id, {{APP_ID}}, --worker-url, {{WORKER_URL}}),
// Map(SPARK_USER -> root, SPARK_EXECUTOR_MEMORY -> 1024m),
// List(),List(),ArraySeq(-Dspark.driver.port=49972, -XX:+PrintGCDetails,
-Dkey=value, -Dnumbers=one two three))
val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
//sc.ui就是SparkUI,得到sparkUI的http地址 http://192.168.1.152:4040,
这个4040就是spark的作业运行时job相关信息
val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
//每个CoarseGrainedExecutorBackend的core的个数 --executor-cores
或spark.executor.cores设置为2-4为宜,
// 在yarn下面可以配制--num-executors(集群的总CoarseGrainedExecutorBackend)*
--executor-cores(每个Executor的core数量)
val coresPerExecutor = conf.getOption("spark.executor.cores").map(_.toInt)
//Application是一个case class(包括应用程序名、executor运行内存等).
sc.eventLogCodec: Some(hdfs://ns1/historyserverforspark)
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory,
command, appUIAddress, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor)
//创建AppClient,传入相应启动参数,可以看出会将org.apache.spark.executor.
CoarseGrainedExecutorBackend启动起来
//发送信息给Worker,使用Jdk的ProcessBuilder.start()来启动CoarseGrainedExecutorBackend
//https://blog.csdn.net/a11a2233445566/article/details/54694584
client = new AppClient(sc.env.rpcEnv, masters, appDesc, this, conf)
client.start()
….
}
9,进入AppClient.start():它的作用就是初始化一个ClientEndpoint这个RpcEndpointRef放到RpcEnv中,同时将引用给endpoint原子引用
def start() {
// Just launch an rpcEndpoint; it will call back into thelistener.
//启动一个rpcEndpoint,会将引用给private val endpoint = newAtomicReference[RpcEndpointRef]
endpoint.set(rpcEnv.setupEndpoint("AppClient", new ClientEndpoint(rpcEnv)))
}
10,直接进入ClientEndpoint的onstart()方法,这是RpcEndPoint的生命周期决定:
private class ClientEndpoint(override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint
with Logging{
。。。。为了方便分析流程,我把初始化的变量都去掉了
override def onStart(): Unit = {
try {
//发消息给master来注册app(其实质是发送消息给所有masters,一旦跟一个master连接成功,就cancel与其他master的连接) masterRef.send(RegisterApplication(appDescription,self))
registerWithMaster(1)
} catch {
case e: Exception =>
logWarning("Failed to connect to master", e)
markDisconnected()
stop()
}
}
/**
* Register with all mastersasynchronously. It will call `registerWithMaster` every
* REGISTRATION_TIMEOUT_SECONDS secondsuntil exceeding REGISTRATION_RETRIES times.
* Once we connect to a mastersuccessfully, all scheduling work and Futures will be cancelled.
*
* nthRetry means this is the nthattempt to register with master.
*异步注册所有master.每20秒(REGISTRATION_TIMEOUT_SECONDS)执行一次registerWithMaster,直到超过3次(REGISTRATION_RETRIES)
* 一旦联上master就会将所有调度线程会被取消掉
*参数nthRetry:表示尝试向master注册的次数
*/
private def registerWithMaster(nthRetry:Int) {
//tryRegisterAllMasters()发消息给master来注册app(其实质是发送消息给所有masters,一旦跟一个master连接成功,就cancel与其他master的连接)
//registerMasterFutures:newAtomicReference[Array[JFuture[_]]],将Future存起来方便取消线程
registerMasterFutures.set(tryRegisterAllMasters())
//registrationRetryThread:AtomicReference[JScheduledFuture[_]],方便联接到的master时,将其它的线程通过ScheduledFuture.cancel()掉
//先延迟20秒,然后每20秒执行一次。延迟20s是为了给tryRegisterAllMasters()线程池去直接注册,如果注册成功之后registered会为true
registrationRetryTimer.set(registrationRetryThread.scheduleAtFixedRate(new Runnable {
override def run(): Unit = {
Utils.tryOrExit {
//registered:AtomicBoolean(false)默认是false
//当tryRegisterAllMasters()的线程池注册成功之后会将registered设置true,然后将它里面的线程取消掉,同时将当前调度线程池也关掉
if (registered.get) {
registerMasterFutures.get.foreach(_.cancel(true))
registerMasterThreadPool.shutdownNow()
} else if (nthRetry >= REGISTRATION_RETRIES) {
//重复注册3次,还是没有成功,就认为master死了,会执行sparkContext.stop
markDead("All masters are unresponsive! Giving up.")
} else {
//如果nthRetry小于3,则先将tryRegisterAllMasters线程池中的线程取消掉,然后回调一下自己,并将nthRetry加1
registerMasterFutures.get.foreach(_.cancel(true))
registerWithMaster(nthRetry +1)
}
}
}
}, REGISTRATION_TIMEOUT_SECONDS, REGISTRATION_TIMEOUT_SECONDS, TimeUnit.SECONDS))
}
==>tryRegisterAllMasters()这个是一个缓存线程池,spark自己实现ThreadPoolExecutor,该线程池也是一个守护线程池,主线程停止,它也停止
/**
* Register with all masters asynchronously and returns an array `Future`s for cancellation.
* 异步注册所有master和返回Future集合
*/
private def tryRegisterAllMasters():Array[JFuture[_]] = {
//masterRpcAddresses:是每个master的RpcAddress集合
for (masterAddress<- masterRpcAddresses) yield {
//newDaemonCachedThreadPool
// spark重写了缓存线程池,该池是守护线程池,主线程退出,它也消失,jdk默认提供的缓存线程池也是ThreadPoolExecutor重新构造了一下,
registerMasterThreadPool.submit(new Runnable {
override def run(): Unit = try {
//默认是newAtomicBoolean(false)
if (registered.get) {
return
}
/**
* 18/04/04 14:17:57 INFOclient.AppClient$ClientEndpoint: Connecting to master spark://luyl153:7077...
18/04/04 14:17:57 INFOclient.AppClient$ClientEndpoint: Connecting to master spark://luyl152:7077...
18/04/04 14:17:57 INFOclient.AppClient$ClientEndpoint: Connecting to master spark://luyl154:7077...
*/
logInfo("Connecting tomaster " +masterAddress.toSparkURL + "...")
//取得masterRpcEndPoint,然后发送注册信息
// RpcEnv是在sparkEnv创建出来的,而MasterRpcEndPoint是通过Master自己的RpcEnv中创建出来的。也可以得到MasterRpcEndpoint,比较神奇,
// client模式driver和master都在一个节点上,cluster模式不知道是不是也可以这取吗
val masterRef =
rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME)
//需要注意的是:这里的appDescription包含了app的具体信息,包括command信息,里面有启动类CoarseGrainedExecutorBackend;这里的self是ClientEndpoint本身
masterRef.send(RegisterApplication(appDescription, self))
} catch {
case ie: InterruptedException => // Cancelled
case NonFatal(e) =>logWarning(s"Failed toconnect to master $masterAddress", e)
}
})
}
}
//重复注册3次,还是没有成功,就认为master死了,会执行sparkContext.stop
def markDead(reason: String) {
//alreadyDead:AtomicBoolean(false)默认是false
if (!alreadyDead.get) {
//listener就是SparkDeploySchedulerBackend
//TaskSchedulerImpl.error(reason)===》 如果taskSetsByStageIdAndAttempt(key为stageId,value为一个HashMap,
// 这个HashMap中的key为stageAttemptId,value为TaskSetManager对象) 有值会给TaskSetManager.abore()
//==>如果taskSetsByStageIdAndAttempt没有值会直接抛SparkException异常,会让sparkContext.stop
//当前情况下taskSetsByStageIdAndAttempt肯定没有值,所以会直接退出
listener.dead(reason)
alreadyDead.set(true)
}
}
。。。。
}