运行流程
组件介绍:
Driver
ReceiverTracker:负责接收BlockGenerator发送过来的Block信息。
JobScheduler:根据ReceiverTracker的Block信息,定期生成RDD计算任务,并这些Task提交到Executor上执行。
Executor
Receiver:负责接收数据源的数据
BlockGenerator:定期将Receiver的数据封装成Block再保存到BlockManager,并向ReceiverTracker发送块信息。
streamingcontext启动
streamingcontext是spark streaming的程序入口,先看streamingcontext中的重要变量。
// sparkContext,因为spark streaming最终也是调用spark core
private[streaming] val sc: SparkContext = {
if (_sc != null) {
_sc
} else if (isCheckpointPresent) {
SparkContext.getOrCreate(_cp.createSparkConf())
} else {
throw new SparkException("Cannot create StreamingContext without a SparkContext")
}
}
//保存Dtream的依赖关系
private[streaming] val graph: DStreamGraph = {
if (isCheckpointPresent) {
_cp.graph.setContext(this)
_cp.graph.restoreCheckpointData()
_cp.graph
} else {
require(_batchDur != null, "Batch duration for StreamingContext cannot be null")
val newGraph = new DStreamGraph()
newGraph.setBatchDuration(_batchDur)
newGraph
}
}
//JobScheduler 调度batch间隔生成的job
private[streaming] val scheduler = new JobScheduler(this)
spark streaming的启动工作主要在start方法中,核心是启动了JobScheduler。
def start(): Unit = synchronized {
state match {
case INITIALIZED =>
startSite.set(DStream.getCreationSite())
StreamingContext.ACTIVATION_LOCK.synchronized {
StreamingContext.assertNoOtherContextIsActive()
try {
validate()
ThreadUtils.runInNewThread("streaming-start") {
sparkContext.setCallSite(startSite.get)
sparkContext.clearJobGroup()
sparkContext.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false")
savedProperties.set(SerializationUtils.clone(sparkContext.localProperties.get()))
// 调用JobScheduler的start方法
scheduler.start()
}
state = StreamingContextState.ACTIVE
scheduler.listenerBus.post(
StreamingListenerStreamingStarted(System.currentTimeMillis()))
} catch {
case NonFatal(e) =>
logError("Error starting the context, marking it as stopped", e)
scheduler.stop(false)
state = StreamingContextState.STOPPED
throw e
}
StreamingContext.setActiveContext(this)
}
logDebug("Adding shutdown hook") // force eager creation of logger
shutdownHookRef = ShutdownHookManager.addShutdownHook(
StreamingContext.SHUTDOWN_HOOK_PRIORITY)(() => stopOnShutdown())
assert(env.metricsSystem != null)
env.metricsSystem.registerSource(streamingSource)
uiTab.foreach(_.attach())
logInfo("StreamingContext started")
case ACTIVE =>
logWarning("StreamingContext has already been started")
case STOPPED =>
throw new IllegalStateException("StreamingContext has already been stopped")
}
}
JobScheduler start方法启动了receiverTracker和jobGenerator。
def start(): Unit = synchronized {
if (eventLoop != null) return
logDebug("Starting JobScheduler")
eventLoop = new EventLoop[JobSchedulerEvent]("JobScheduler") {
override protected def onReceive(event: JobSchedulerEvent): Unit = processEvent(event)
override protected def onError(e: Throwable): Unit = reportError("Error in job scheduler", e)
}
eventLoop.start()
for {
inputDStream <- ssc.graph.getInputStreams
rateController <- inputDStream.rateController
} ssc.addStreamingListener(rateController)
listenerBus.start()
// 创建ReceiverTracker
receiverTracker = new ReceiverTracker(ssc)
inputInfoTracker = new InputInfoTracker(ssc)
val executorAllocClient: ExecutorAllocationClient = ssc.sparkContext.schedulerBackend match {
case b: ExecutorAllocationClient => b.asInstanceOf[ExecutorAllocationClient]
case _ => null
}
// 分配Executor
executorAllocationManager = ExecutorAllocationManager.createIfEnabled(
executorAllocClient,
receiverTracker,
ssc.conf,
ssc.graph.batchDuration.milliseconds,
clock)
executorAllocationManager.foreach(ssc.addStreamingListener)
// 启动receiverTracker
receiverTracker.start()
// 启动jobGenerator
jobGenerator.start()
executorAllocationManager.foreach(_.start())
logInfo("Started JobScheduler")
}
operator算子注册
spark streaming中Dstream的各种算子最终会形成一个调用链,当outputOperator类算子调用时,这个关系保存在sparkstreaming的graph中。下面以print为例分析operator算子
def print(): Unit = ssc.withScope {
print(10)
}
def print(num: Int): Unit = ssc.withScope {
def foreachFunc: (RDD[T], Time) => Unit = {
(rdd: RDD[T], time: Time) => {
val firstNum = rdd.take(num + 1)
// scalastyle:off println
println("-------------------------------------------")
println(s"Time: $time")
println("-------------------------------------------")
firstNum.take(num).foreach(println)
if (firstNum.length > num) println("...")
println()
// scalastyle:on println
}
}
foreachRDD(context.sparkContext.clean(foreachFunc), displayInnerRDDOps = false)
}
private def foreachRDD(
foreachFunc: (RDD[T], Time) => Unit,
displayInnerRDDOps: Boolean): Unit = {
new ForEachDStream(this,
context.sparkContext.clean(foreachFunc, false), displayInnerRDDOps).register()
}
private[streaming] def register(): DStream[T] = {
ssc.graph.addOutputStream(this)
this
}
receiver启动
receiverTracker的start方法会调用launchReceivers来启动receiver
//ReceiverTracker start方法
def start(): Unit = synchronized {
if (isTrackerStarted) {
throw new SparkException("ReceiverTracker already started")
}
if (!receiverInputStreams.isEmpty) {
// 初始化RPC endpoint
endpoint = ssc.env.rpcEnv.setupEndpoint(
"ReceiverTracker", new ReceiverTrackerEndpoint(ssc.env.rpcEnv))
// 调用launchReceivers来启动Receiver
if (!skipReceiverLaunch) launchReceivers()
logInfo("ReceiverTracker started")
trackerState = Started
}
}
launchReceivers 方法中会发送StartAllReceivers消息
private def launchReceivers(): Unit = {
val receivers = receiverInputStreams.map { nis =>
val rcvr = nis.getReceiver()
rcvr.setReceiverId(nis.id)
rcvr
}
runDummySparkJob()
logInfo("Starting " + receivers.length + " receivers")
// 发送启动所有receiver的消息
endpoint.send(StartAllReceivers(receivers))
}
receive方法中有对StartAllReceivers消息的处理
override def receive: PartialFunction[Any, Unit] = {
// 启动所有receivers
case StartAllReceivers(receivers) =>
val scheduledLocations = schedulingPolicy.scheduleReceivers(receivers, getExecutors)
// 遍历receivers,然后启动每一个receiver
for (receiver <- receivers) {
val executors = scheduledLocations(receiver.streamId)
updateReceiverScheduledExecutors(receiver.streamId, executors)
receiverPreferredLocations(receiver.streamId) = receiver.preferredLocation
startReceiver(receiver, executors)
}
...
}
startReceiver方法中会提交一个job,负责在每个executor上启动receiver
private def startReceiver(
receiver: Receiver[_],
scheduledLocations: Seq[TaskLocation]): Unit = {
def shouldStartReceiver: Boolean = {
!(isTrackerStopping || isTrackerStopped)
}
val receiverId = receiver.streamId
if (!shouldStartReceiver) {
onReceiverJobFinish(receiverId)
return
}
val checkpointDirOption = Option(ssc.checkpointDir)
val serializableHadoopConf =
new SerializableConfiguration(ssc.sparkContext.hadoopConfiguration)
// receiver在worker节点启动的方法(ReceiverSupervisorImpl的start方法)
val startReceiverFunc: Iterator[Receiver[_]] => Unit =
(iterator: Iterator[Receiver[_]]) => {
if (!iterator.hasNext) {
throw new SparkException(
"Could not start receiver as object not found.")
}
if (TaskContext.get().attemptNumber() == 0) {
val receiver = iterator.next()
assert(iterator.hasNext == false)
val supervisor = new ReceiverSupervisorImpl(
receiver, SparkEnv.get, serializableHadoopConf.value, checkpointDirOption)
supervisor.start()
supervisor.awaitTermination()
} else {
// It's restarted by TaskScheduler, but we want to reschedule it again. So exit it.
}
}
val receiverRDD: RDD[Receiver[_]] =
if (scheduledLocations.isEmpty) {
ssc.sc.makeRDD(Seq(receiver), 1)
} else {
val preferredLocations = scheduledLocations.map(_.toString).distinct
ssc.sc.makeRDD(Seq(receiver -> preferredLocations))
}
receiverRDD.setName(s"Receiver $receiverId")
ssc.sparkContext.setJobDescription(s"Streaming job running receiver $receiverId")
ssc.sparkContext.setCallSite(Option(ssc.getStartSite()).getOrElse(Utils.getCallSite()))
// 提交启动receiver的job到spark核心进行启动
val future = ssc.sparkContext.submitJob[Receiver[_], Unit, Unit](
receiverRDD, startReceiverFunc, Seq(0), (_, _) => Unit, ())
future.onComplete {
case Success(_) =>
if (!shouldStartReceiver) {
onReceiverJobFinish(receiverId)
} else {
logInfo(s"Restarting Receiver $receiverId")
self.send(RestartReceiver(receiver))
}
case Failure(e) =>
if (!shouldStartReceiver) {
onReceiverJobFinish(receiverId)
} else {
logError("Receiver has been stopped. Try to restart it.", e)
logInfo(s"Restarting Receiver $receiverId")
self.send(RestartReceiver(receiver))
}
}(ThreadUtils.sameThread)
logInfo(s"Receiver ${receiver.streamId} started")
}
至此receiver启动完毕
数据接收
BlockGenerator是Receiver中比较重要的一个类,主要作用是将收到的单条数据写入buffer,然后再定时将buffer封装为块,并进行存储和向Driver汇报。
private[streaming] class BlockGenerator(
listener: BlockGeneratorListener, // 监听器,用来监听事件
receiverId: Int,
conf: SparkConf,
clock: Clock = new SystemClock()
) extends RateLimiter(conf) with Logging {
//定时器,定时将currentBuffer中的数据封装成Block,然后推送到blocksForPushing里
private val blockIntervalTimer =
new RecurringTimer(clock, blockIntervalMs, updateCurrentBuffer, "BlockGenerator")
// blockQueueSize 队列的大小
private val blockQueueSize = conf.getInt("spark.streaming.blockQueueSize", 10)
// 一个队列,用来存取封装好的Block
private val blocksForPushing = new ArrayBlockingQueue[Block](blockQueueSize)
// 这是一个线程,用来从blocksForPushing中取出Block,然后进行存储,汇报ReceiverTracker
private val blockPushingThread = new Thread() { override def run() { keepPushingBlocks() } }
// 用来暂存接受到的数据
@volatile private var currentBuffer = new ArrayBuffer[Any]
// 按照时间生成块,然后将块推到blocksForPushing中
private def updateCurrentBuffer(time: Long): Unit = {
try {
var newBlock: Block = null
synchronized {
if (currentBuffer.nonEmpty) {
val newBlockBuffer = currentBuffer
currentBuffer = new ArrayBuffer[Any]
val blockId = StreamBlockId(receiverId, time - blockIntervalMs)
listener.onGenerateBlock(blockId)
newBlock = new Block(blockId, newBlockBuffer)
}
}
if (newBlock != null) {
blocksForPushing.put(newBlock) // put is blocking when queue is full
}
} catch {
case ie: InterruptedException =>
logInfo("Block updating timer thread was interrupted")
case e: Exception =>
reportError("Error in block updating thread", e)
}
}
// 推送块
private def keepPushingBlocks() {
logInfo("Started block pushing thread")
def areBlocksBeingGenerated: Boolean = synchronized {
state != StoppedGeneratingBlocks
}
try {
while (areBlocksBeingGenerated) {
Option(blocksForPushing.poll(10, TimeUnit.MILLISECONDS)) match {
case Some(block) => pushBlock(block)
case None =>
}
}
logInfo("Pushing out the last " + blocksForPushing.size() + " blocks")
while (!blocksForPushing.isEmpty) {
val block = blocksForPushing.take()
logDebug(s"Pushing block $block")
pushBlock(block)
logInfo("Blocks left to push " + blocksForPushing.size())
}
logInfo("Stopped block pushing thread")
} catch {
case ie: InterruptedException =>
logInfo("Block pushing thread was interrupted")
case e: Exception =>
reportError("Error in block pushing thread", e)
}
}
// 推送块
private def pushBlock(block: Block) {
listener.onPushBlock(block.id, block.buffer)
logInfo("Pushed block " + block.id)
}
}
BlockGenerator中使用了一个ArrayBuffer来不断的接收存储数据,然后会按时将ArrayBuffer中的数据封装为Block。另有一个队列ArrayBlockingQueue来存取Block,一边存一边取,这样就实现了数据的接收与存储。
pushBlock中调用了listener.onPushBlock(),listener是构造BlockGenerator时传进来的,使用的是ReceiverSupervisorImpl中的defaultBlockGeneratorListener。
private val defaultBlockGeneratorListener = new BlockGeneratorListener {
def onAddData(data: Any, metadata: Any): Unit = { }
def onGenerateBlock(blockId: StreamBlockId): Unit = { }
def onError(message: String, throwable: Throwable) {
reportError(message, throwable)
}
// 推块的时候调用,它又会调用ReceiverSupervisorImpl.pushArrayBuffer()
def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
pushArrayBuffer(arrayBuffer, None, Some(blockId))
}
}
// 将接收到的数据的ArrayBuffer作为数据块存储到Spark的内存中
def pushArrayBuffer(
arrayBuffer: ArrayBuffer[_],
metadataOption: Option[Any],
blockIdOption: Option[StreamBlockId]
) {
// 调用pushAndReportBlock()
pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
}
// 将块数据进行存储,然后汇报给Driver
def pushAndReportBlock(
receivedBlock: ReceivedBlock,
metadataOption: Option[Any],
blockIdOption: Option[StreamBlockId]
) {
val blockId = blockIdOption.getOrElse(nextBlockId)
val time = System.currentTimeMillis
// 这步会真正的存储数据
val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms")
val numRecords = blockStoreResult.numRecords
val blockInfo = ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult)
// 将存储结果报告Driver
if (!trackerEndpoint.askSync[Boolean](AddBlock(blockInfo))) {
throw new SparkException("Failed to add block to receiver tracker.")
}
logDebug(s"Reported block
receiver的作用主要是负责从各种数据源拉取数据。receiver的启动入口是ReceiverSupervisorImpl的start方法。ReceiverSupervisorImpl的父类是ReceiverSupervisor,因此start的实际执行体在ReceiverSupervisor的start方法中。
// ReceiverSupervisor的start方法
def start() {
onStart()
startReceiver()
}
// ReceiverSupervisorImpl的onStart方法
override protected def onStart() {
registeredBlockGenerators.asScala.foreach { _.start() }
}
// BlockGenerator的start方法
def start(): Unit = synchronized {
if (state == Initialized) {
state = Active
blockIntervalTimer.start()
blockPushingThread.start()
logInfo("Started BlockGenerator")
} else {
throw new SparkException(
s"Cannot start BlockGenerator as its not in the Initialized state [state = $state]")
}
}
// 启动receiver
def startReceiver(): Unit = synchronized {
try {
if (onReceiverStart()) {
logInfo(s"Starting receiver $streamId")
receiverState = Started
receiver.onStart()
logInfo(s"Called receiver $streamId onStart")
} else {
stop("Registered unsuccessfully because Driver refused to start receiver " + streamId, None)
}
} catch {
case NonFatal(t) =>
stop("Error starting receiver " + streamId, Some(t))
}
}
def start(): Unit = synchronized {
if (state == Initialized) {
state = Active
blockIntervalTimer.start()
blockPushingThread.start()
logInfo("Started BlockGenerator")
} else {
throw new SparkException(
s"Cannot start BlockGenerator as its not in the Initialized state [state = $state]")
}
}
BlockGenerator的start方法会启动块生成的定时器和推送块的线程。
startReceiver()方法中,调用receiver.onStart(),开始接收数据。Receiver是一个抽象类,不同的数据源有不同的实现。下面以SocketReceiver为例来剖析其实现。
private[streaming]
class SocketReceiver[T: ClassTag](
host: String,
port: Int,
bytesToObjects: InputStream => Iterator[T],
storageLevel: StorageLevel
) extends Receiver[T](storageLevel) with Logging {
private var socket: Socket = _
def onStart() {
logInfo(s"Connecting to $host:$port")
try {
// 启动socket,开始监听
socket = new Socket(host, port)
} catch {
case e: ConnectException =>
restart(s"Error connecting to $host:$port", e)
return
}
logInfo(s"Connected to $host:$port")
// Start the thread that receives data over a connection
new Thread("Socket Receiver") {
setDaemon(true)
override def run() { receive() }
}.start()
}
def receive() {
try {
// 接收数据
val iterator = bytesToObjects(socket.getInputStream())
while(!isStopped && iterator.hasNext) {
// 将接收到的数据进行存储
store(iterator.next())
}
if (!isStopped()) {
restart("Socket data stream had no more data")
} else {
logInfo("Stopped receiving")
}
} catch {
case NonFatal(e) =>
logWarning("Error receiving data", e)
restart("Error receiving data", e)
} finally {
onStop()
}
}
}
onStart中启动了一个线程用于不断的接收数据并调用store()将接收到的数据进行存储。
def store(dataItem: T) {
supervisor.pushSingle(dataItem)
}
/** Store an ArrayBuffer of received data as a data block into Spark's memory. */
def store(dataBuffer: ArrayBuffer[T]) {
supervisor.pushArrayBuffer(dataBuffer, None, None)
}
/**
* Store an ArrayBuffer of received data as a data block into Spark's memory.
* The metadata will be associated with this block of data
* for being used in the corresponding InputDStream.
*/
def store(dataBuffer: ArrayBuffer[T], metadata: Any) {
supervisor.pushArrayBuffer(dataBuffer, Some(metadata), None)
}
/** Store an iterator of received data as a data block into Spark's memory. */
def store(dataIterator: Iterator[T]) {
supervisor.pushIterator(dataIterator, None, None)
}
运行job
jobGenerator负责生成,运行job。
private[streaming]
class JobGenerator(jobScheduler: JobScheduler) extends Logging {
// 定时器,按照批处理间隔定时向eventLoop发送生成job的消息
private val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
longTime => eventLoop.post(GenerateJobs(new Time(longTime))), "JobGenerator")
// job生成消息的事件队列
private var eventLoop: EventLoop[JobGeneratorEvent] = null
}
下面看下JobGenerator的start方法,start方法中会启动eventLoop进行事件的处理。
def start(): Unit = synchronized {
if (eventLoop != null) return // generator has already been started
// Call checkpointWriter here to initialize it before eventLoop uses it to avoid a deadlock.
// See SPARK-10125
checkpointWriter
// eventLoop的回调方法onReceive会调用processEvent(event)进行事件的处理
eventLoop = new EventLoop[JobGeneratorEvent]("JobGenerator") {
override protected def onReceive(event: JobGeneratorEvent): Unit = processEvent(event)
override protected def onError(e: Throwable): Unit = {
jobScheduler.reportError("Error in job generator", e)
}
}
// 启动事件队列
eventLoop.start()
if (ssc.isCheckpointPresent) {
restart()
} else {
startFirstTime()
}
}
startFirstTime中会启动timer并定期向eventLoop中发送GenerateJobs消息。
private def startFirstTime() {
val startTime = new Time(timer.getStartTime())
// 启动DStreamGraph
graph.start(startTime - graph.batchDuration)
// 启动定时器timer
timer.start(startTime.milliseconds)
logInfo("Started JobGenerator at " + startTime)
}
eventLoop启动后会启动一个线程不断的来消费队列中的消息,处理方法为processEvent。
private def processEvent(event: JobGeneratorEvent) {
logDebug("Got event " + event)
event match {
case GenerateJobs(time) => generateJobs(time) //根据时间生成job
case ClearMetadata(time) => clearMetadata(time)
case DoCheckpoint(time, clearCheckpointDataLater) =>
doCheckpoint(time, clearCheckpointDataLater)
case ClearCheckpointData(time) => clearCheckpointData(time)
}
}
对于GenerateJobs类型的消息调用generateJobs进行处理,generateJobs首先调用receiverTracker.allocateBlocksToBatch()给当前批分配需要处理的数据,之后调用DStreamGraph.generateJobs()生成job序列,如果生成成功,调用jobScheduler.submitJobSet提交job。
private def generateJobs(time: Time) {
// Checkpoint all RDDs marked for checkpointing to ensure their lineages are
// truncated periodically. Otherwise, we may run into stack overflows (SPARK-6847).
ssc.sparkContext.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, "true")
Try {
// 调用receiverTracker给批分配数据
jobScheduler.receiverTracker.allocateBlocksToBatch(time) // allocate received blocks to batch
// 在DStreamGraph中根据分配的块生成job
graph.generateJobs(time) // generate jobs using allocated block
} match {
// 如果job生成成功,调用jobScheduler.submitJobSet提交job
case Success(jobs) =>
val streamIdToInputInfos = jobScheduler.inputInfoTracker.getInfo(time)
jobScheduler.submitJobSet(JobSet(time, jobs, streamIdToInputInfos))
// 失败则打报告
case Failure(e) =>
jobScheduler.reportError("Error generating jobs for time " + time, e)
PythonDStream.stopStreamingContextIfPythonProcessIsDead(e)
}
// 完成后进行checkpoint
eventLoop.post(DoCheckpoint(time, clearCheckpointDataLater = false))
}