1.processEvent方法
//接口继承关系 private[scheduler] sealed trait JobSchedulerEvent private[scheduler] case class JobStarted(job: Job, startTime: Long) extends JobSchedulerEvent private[scheduler] case class JobCompleted(job: Job, completedTime: Long) extends JobSchedulerEvent private[scheduler] case class ErrorReported(msg: String, e: Throwable) extends JobSchedulerEvent
private def processEvent(event: JobSchedulerEvent) { try { event match { //开始监听作业时间,统计开始时间 case JobStarted(job, startTime) => handleJobStart(job, startTime) //结束时间信息统计 case JobCompleted(job, completedTime) => handleJobCompletion(job, completedTime) //持有的loack全部采用condition.signalAll方式进行提交 case ErrorReported(m, e) => handleError(m, e) } } catch { case e: Throwable => reportError("Error in job scheduler", e) } }
1.1 handleJobStart方法
private def handleJobStart(job: Job, startTime: Long) { //设置hash表值 val jobSet = jobSets.get(job.time) //判断开始时间是否是set的第一个,如果小于0就是第一个 val isFirstJobOfJobSet = !jobSet.hasStarted //开始设置开始时间 jobSet.handleJobStart(job) //是第一个 if (isFirstJobOfJobSet) { //将事件放置到阻塞队里中 listenerBus.post(StreamingListenerBatchStarted(jobSet.toBatchInfo)) } //设置开始时间 job.setStartTime(startTime) listenerBus.post(StreamingListenerOutputOperationStarted(job.toOutputOperationInfo)) logInfo("Starting job " + job.id + " from job set of time " + jobSet.time) } 1.2 handleJobCompletion方法 private def handleJobCompletion(job: Job, completedTime: Long) { val jobSet = jobSets.get(job.time) jobSet.handleJobCompletion(job) job.setEndTime(completedTime) listenerBus.post(StreamingListenerOutputOperationCompleted(job.toOutputOperationInfo)) logInfo("Finished job " + job.id + " from job set of time " + jobSet.time) if (jobSet.hasCompleted) { listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo)) } job.result match { case Failure(e) => reportError("Error running job " + job, e) case _ => if (jobSet.hasCompleted) { //jobset没有元素了移除jobset的时间 jobSets.remove(jobSet.time) //设置clearmetadata时间 jobGenerator.onBatchCompletion(jobSet.time) //后面就是时间计算。 logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format( jobSet.totalDelay / 1000.0, jobSet.time.toString, jobSet.processingDelay / 1000.0 )) } }
2. start方法
def start(): Unit = synchronized { if (eventLoop != null) return // scheduler has already been started logDebug("Starting JobScheduler") //这行代码很巧妙,EventLoop是一个抽象类,new对象的时候首先是初始化变量,会初始化一个EventLoop的线程而这个线程,而这个线程又不停的从阻塞队里中取元素,然后调用子类的onReceive方法。 eventLoop = new EventLoop[JobSchedulerEvent]("JobScheduler") { override protected def onReceive(event: JobSchedulerEvent): Unit = processEvent(event) override protected def onError(e: Throwable): Unit = reportError("Error in job scheduler", e) } //后台监视事件,前台来开始线程。这个函数注意有一个onStart方法主要用来保证onReceive在开始之后才跑,因为这个事件队里是阻塞式的事件队列,当没有元素的时候是不会开始循环调onReceive方法的。然后真正将后台的线程开始启动起来。 eventLoop.start() // attach rate controllers of input streams to receive batch completion updates for { //输入流 inputDStream <- ssc.graph.getInputStreams //跟踪消费速度 rateController <- inputDStream.rateController } //放到一个copyonwrite的集合里,为啥?当然是实时可以看到【准确】图像啦 ssc.addStreamingListener(rateController) listenerBus.start() //初始化: receiverTracker = new ReceiverTracker(ssc) inputInfoTracker = new InputInfoTracker(ssc) val executorAllocClient: ExecutorAllocationClient = ssc.sparkContext.schedulerBackend match { case b: ExecutorAllocationClient => b.asInstanceOf[ExecutorAllocationClient] case _ => null } //主要用于管理分配给StreamingContext的executor,动态请求,杀死执行器。可收缩可扩展,不错啊。val averageBatchProcTime = //batchProcTimeSum / batchProcTimeCount //val ratio = averageBatchProcTime.toDouble / batchDurationMs //if (ratio >= scalingUpRatio) { // logDebug("Requesting executors") // val numNewExecutors = math.max(math.round(ratio).toInt, 1) /**这种情况EXCUTOR会被kill掉**/ //if (ratio <= scalingDownRatio) { // logDebug("Killing executors") // killExecutor() //} //怎么调? //private val scalingUpRatio = conf.getDouble(SCALING_UP_RATIO_KEY, SCALING_UP_RATIO_DEFAULT) //SCALING_UP_RATIO_KEY的值是啥? // SCALING_UP_RATIO_KEY =“spark.streaming.dynamicAllocation.scalingUpRatio”的值 // val SCALING_UP_RATIO_DEFAULT = 0.9 默认的值 //private val scalingDownRatio = conf.getDouble(SCALING_DOWN_RATIO_KEY, SCALING_DOWN_RATIO_DEFAULT) // SCALING_DOWN_RATIO_KEY ="spark.streaming.dynamicAllocation.scalingDownRatio"的值 // val SCALING_DOWN_RATIO_DEFAULT = 0.3 //private val minNumExecutors = conf.getInt(MIN_EXECUTORS_KEY,math.max(1, receiverTracker.numReceivers(这个值//是receiverInputStreams数组的大小))) // val MIN_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.minExecutors" //private val maxNumExecutors = conf.getInt(MAX_EXECUTORS_KEY, Integer.MAX_VALUE) // val MAX_EXECUTORS_KEY = "spark.streaming.dynamicAllocation.maxExecutors" //private val timer = new RecurringTimer(clock, scalingIntervalSecs * 1000, // _ => manageAllocation(), "streaming-executor-allocation-manager") //设置周期多少时间去调用manageAllocation这个修改excutor的函数 //val SCALING_INTERVAL_KEY = "spark.streaming.dynamicAllocation.scalingInterval" // val SCALING_INTERVAL_DEFAULT_SECS = 60 //如果你要动态分配val ENABLED_KEY = "spark.streaming.dynamicAllocation.enabled"这个设置为true并且实例个数不要设置 // val numExecutor = conf.getInt("spark.executor.instances", 0) // val streamingDynamicAllocationEnabled = conf.getBoolean(ENABLED_KEY, false) // if (numExecutor != 0 && streamingDynamicAllocationEnabled) { executorAllocationManager = ExecutorAllocationManager.createIfEnabled( executorAllocClient, receiverTracker, ssc.conf, ssc.graph.batchDuration.milliseconds, clock) executorAllocationManager.foreach(ssc.addStreamingListener) // endpoint = ssc.env.rpcEnv.setupEndpoint( // "ReceiverTracker", new ReceiverTrackerEndpoint(ssc.env.rpcEnv)) //if (!skipReceiverLaunch) launchReceivers()分发到各个节点上,怎么发的下次再写 receiverTracker.start() //同样道理EventLoop jobGenerator.start() executorAllocationManager.foreach(_.start())//这里的start方法如果配置了动态的就动态调excutor数量了 logInfo("Started JobScheduler") }
3.submitJobSet方法
// listenerBus.post(StreamingListenerBatchSubmitted(jobSet.toBatchInfo))
// jobSets.put(jobSet.time, jobSet)
// jobSet.jobs.foreach(job => jobExecutor.execute(new JobHandler(job)))
// logInfo("Added jobs for time " + jobSet.time)
jobExecutor其实就是一个线程池而已
//private val jobExecutor =ThreadUtils.newDaemonFixedThreadPool(numConcurrentJobs, "streaming-job-executor")
// private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)默认作业数量是1
4.JobHandler这个线程
private class JobHandler(job: Job) extends Runnable with Logging {
import JobScheduler._
def run() {
//获取配置信息
val oldProps = ssc.sparkContext.getLocalProperties
try {
ssc.sparkContext.setLocalProperties(SerializationUtils.clone(ssc.savedProperties.get()))
val formattedTime = UIUtils.formatBatchTime(
job.time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
val batchUrl = s"/streaming/batch/?id=${job.time.milliseconds}"
val batchLinkText = s"[output operation ${job.outputOpId}, batch time ${formattedTime}]"
ssc.sc.setJobDescription(
s"""Streaming job from <a href="$batchUrl">$batchLinkText</a>""")
val BATCH_TIME_PROPERTY_KEY = "spark.streaming.internal.batchTime"
val OUTPUT_OP_ID_PROPERTY_KEY = "spark.streaming.internal.outputOpId"
ssc.sc.setLocalProperty(BATCH_TIME_PROPERTY_KEY, job.time.milliseconds.toString)
ssc.sc.setLocalProperty(OUTPUT_OP_ID_PROPERTY_KEY, job.outputOpId.toString)
//检查checkpoint血缘关系被截断。
ssc.sparkContext.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, "true")
var _eventLoop = eventLoop
if (_eventLoop != null) {
//监听到事件准备开始啦
_eventLoop.post(JobStarted(job, clock.getTimeMillis()))
PairRDDFunctions.disableOutputSpecValidation.withValue(true) {
//开始啦
job.run()
}
_eventLoop = eventLoop
if (_eventLoop != null) {
_eventLoop.post(JobCompleted(job, clock.getTimeMillis()))
}
} else {
// JobScheduler has been stopped.
}
} finally {
ssc.sparkContext.setLocalProperties(oldProps)
}
}
}