第11课:Spark Streaming源码解读之Driver中的ReceiverTracker架构设计以及具体实现彻底研究

本期内容:

1.ReceiverTracker的架构设计

2.消息循环系统

3ReceiverTracker具体实现

 

启动Receiver的方式:

1.把每个Receiver都封装成为task,这个task是这个job中唯一的task,实质上讲ReceiverTracker启动Receiver的方式就是封装成一个一个的job,有多少个job就会启动多少Receiver。每个task就一条数据,就是Receiver的数据。

2.ReceiverTracker在启动Receiver的时候有一个ReceiverSupervisor, ReceiverSupervisorImp做为ReceiverSupervisor的实现,ReceiverSupervisor在启动的时候会启动Receiver,然后Receiver不断的接收数据,会通过blockGenerate把自己接收的数据变成一个一个的block,背后自己有个定时器,这个定时器会不断的存储数据。一种是直接通过blockGenerate存储,一种是先写日志WALReceiverSupervisorImpl会把存储的元数据汇报给ReceiverTracker(实际上是ReceiverTracker中的RPC通信消息实体)。后面进行下一步的数据管理工作。

 

数据的大小一般从多少记录考虑,例如10亿级别的

源码

ReceivedBlockHandler

//写数据的时候是通过ReceivedBlockHandler
private val receivedBlockHandler: ReceivedBlockHandler = {
 
if (WriteAheadLogUtils.enableReceiverLog(env.conf)) {
   
if (checkpointDirOption.isEmpty) {
     
throw new SparkException(
       
"Cannot enable receiver write-ahead log without checkpoint directory set. " +
         
"Please use streamingContext.checkpoint() to set the checkpoint directory. " +
         
"See documentation for more details.")
   
}
    new WriteAheadLogBasedBlockHandler(env.blockManager, receiver.streamId,
     
receiver.storageLevel, env.conf, hadoopConf, checkpointDirOption.get)
  } else {
   
new BlockManagerBasedBlockHandler(env.blockManager, receiver.storageLevel)
 
}
}

pushAndReportBlock

//存储数据且把数据汇报给Driver
def pushAndReportBlock(
   
receivedBlock: ReceivedBlock,
    metadataOption: Option[Any],
    blockIdOption: Option[StreamBlockId]
  ) {
  val blockId = blockIdOption.getOrElse(nextBlockId)
 
val time = System.currentTimeMillis
 
val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
 
logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms")
 
val numRecords = blockStoreResult.numRecords
 
val blockInfo = ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult)
 
trackerEndpoint.askWithRetry[Boolean](AddBlock(blockInfo))
 
logDebug(s"Reported block $blockId")
}

private[streaming] case class ReceivedBlockInfo(
   
streamId: Int,
    numRecords: Option[Long],
    metadataOption: Option[Any],
    blockStoreResult: ReceivedBlockStoreResult
  ) {

 

/** Remote RpcEndpointRef for the ReceiverTracker */
private val trackerEndpoint = RpcUtils.makeDriverRef("ReceiverTracker", env.conf, env.rpcEnv)

 

 

ReceiverTracker是整个block管理的中心

//RPC消息循环体接收来自receiver的消息
private class ReceiverTrackerEndpoint(override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint {

 

//sealed说明所有的消息都在这里
private[streaming] sealed trait ReceiverTrackerMessage
private[streaming] case class RegisterReceiver(
   
streamId: Int,
    typ: String,
   
host: String,
   
executorId: String,
   
receiverEndpoint: RpcEndpointRef
  ) extends ReceiverTrackerMessage
private[streaming] case class AddBlock(receivedBlockInfo: ReceivedBlockInfo)
 
extends ReceiverTrackerMessage
private[streaming] case class ReportError(streamId: Int, message: String, error: String)
private[streaming] case class DeregisterReceiver(streamId: Int, msg: String, error: String)
 
extends ReceiverTrackerMessage

receiveAndReply

override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
// Remote messages
 
case RegisterReceiver(streamId, typ, host, executorId, receiverEndpoint) =>
   
val successful =
     
registerReceiver(streamId, typ, host, executorId, receiverEndpoint, context.senderAddress)
    context.reply(successful)
  case AddBlock(receivedBlockInfo) =>
   
if (WriteAheadLogUtils.isBatchingEnabled(ssc.conf, isDriver = true)) {
     
walBatchingThreadPool.execute(new Runnable {
       
override def run(): Unit = Utils.tryLogNonFatalError {
         
if (active) {
           
context.reply(addBlock(receivedBlockInfo))
          } else {
           
throw new IllegalStateException("ReceiverTracker RpcEndpoint shut down.")
         
}
        }
      })
    } else {
     
context.reply(addBlock(receivedBlockInfo))
    }
  case DeregisterReceiver(streamId, message, error) =>
   
deregisterReceiver(streamId, message, error)
    context.reply(true)
 
// Local messages
 
case AllReceiverIds =>
   
context.reply(receiverTrackingInfos.filter(_._2.state != ReceiverState.INACTIVE).keys.toSeq)
 
case StopAllReceivers =>
   
assert(isTrackerStopping || isTrackerStopped)
    stopReceivers()
    context.reply(true)
}

addBlock

/** Add new blocks for the given stream */
private def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
 
receivedBlockTracker.addBlock(receivedBlockInfo)
}

 

/** Add received block. This event will get written to the write ahead log (if enabled). */
def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
 
try {
   
val writeResult = writeToLog(BlockAdditionEvent(receivedBlockInfo))
   
if (writeResult) {
     
synchronized {
        getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
      }
      logDebug(s"Stream ${receivedBlockInfo.streamId} received " +
       
s"block ${receivedBlockInfo.blockStoreResult.blockId}")
   
} else {
     
logDebug(s"Failed to acknowledge stream ${receivedBlockInfo.streamId} receiving " +
       
s"block ${receivedBlockInfo.blockStoreResult.blockId} in the Write Ahead Log.")
   
}
    writeResult
  } catch {
   
case NonFatal(e) =>
     
logError(s"Error adding block $receivedBlockInfo", e)
     
false
 
}
}

/** Write an update to the tracker to the write ahead log */
private def writeToLog(record: ReceivedBlockTrackerLogEvent): Boolean = {
 
if (isWriteAheadLogEnabled) {
   
logTrace(s"Writing record: $record")
   
try {
     
writeAheadLogOption.get.write(ByteBuffer.wrap(Utils.serialize(record)),
       
clock.getTimeMillis())
      true
   
} catch {
     
case NonFatal(e) =>
       
logWarning(s"Exception thrown while writing record: $record to the WriteAheadLog.", e)
       
false
   
}
 
} else {
   
true
 
}
}

/** Get the queue of received blocks belonging to a particular stream */
private def getReceivedBlockQueue(streamId: Int): ReceivedBlockQueue = {
 
streamIdToUnallocatedBlockQueues.getOrElseUpdate(streamId, new ReceivedBlockQueue)
}

//没有被分配,只是做记录
 
private val streamIdToUnallocatedBlockQueues = new mutable.HashMap[Int, ReceivedBlockQueue]
 

/** Allocate all unallocated blocks to the given batch. */
def allocateBlocksToBatch(batchTime: Time): Unit = {
 
if (receiverInputStreams.nonEmpty) {
   
receivedBlockTracker.allocateBlocksToBatch(batchTime)
 
}
}

def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
 
if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
   
val streamIdToBlocks = streamIds.map { streamId =>
       
(streamId, getReceivedBlockQueue(streamId).dequeueAll(x => true))
   
}.toMap
    val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
   
if (writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))) {
     
timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
     
lastAllocatedBatchTime = batchTime
   
} else {
     
logInfo(s"Possibly processed batch $batchTime need to be processed again in WAL recovery")
   
}
  } else {
   
// This situation occurs when:
   
// 1. WAL is ended with BatchAllocationEvent, but without BatchCleanupEvent,
    // possibly processed batch job or half-processed batch job need to be processed again,
    // so the batchTime will be equal to lastAllocatedBatchTime.
    // 2. Slow checkpointing makes recovered batch time older than WAL recovered
    // lastAllocatedBatchTime.
    // This situation will only occurs in recovery time.
   
logInfo(s"Possibly processed batch $batchTime need to be processed again in WAL recovery")
 
}
}

ReceiverSupervisorImpl.scala

 

private val endpoint = env.rpcEnv.setupEndpoint(
 
"Receiver-" + streamId + "-" + System.currentTimeMillis(), new ThreadSafeRpcEndpoint {
   
override val rpcEnv: RpcEnv = env.rpcEnv

receive

override def receive: PartialFunction[Any, Unit] = {
 
case StopReceiver =>
   
logInfo("Received stop signal")
   
ReceiverSupervisorImpl.this.stop("Stopped by driver", None)
 
case CleanupOldBlocks(threshTime) =>
   
logDebug("Received delete old batch signal")
   
cleanupOldBlocks(threshTime)
  case UpdateRateLimit(eps) =>
   
logInfo(s"Received a new rate limit: $eps.")
   
registeredBlockGenerators.foreach { bg =>
     
bg.updateRate(eps)
    }
}

private[receiver] def updateRate(newRate: Long): Unit =
 
if (newRate > 0) {
   
if (maxRateLimit > 0) {

//限制每个记录流进的速度
     
rateLimiter.setRate(newRate.min(maxRateLimit))
   
} else {
     
rateLimiter.setRate(newRate)
   
}
  }

public final void setRate(double permitsPerSecond) {
   
Preconditions.checkArgument(permitsPerSecond > 0.0D && !Double.isNaN(permitsPerSecond), "rate must be positive");
   
Object var3 = this.mutex;
   
synchronized(this.mutex) {
       
this.resync(this.readSafeMicros());
       
double stableIntervalMicros = (double)TimeUnit.SECONDS.toMicros(1L) / permitsPerSecond;
   
    this.stableIntervalMicros = stableIntervalMicros;
       
this.doSetRate(permitsPerSecond, stableIntervalMicros);
   
}
}

ReceiverSupervisorI

/** Mark the supervisor and the receiver for stopping */
def stop(message: String, error: Option[Throwable]) {
 
stoppingError = error.orNull
 
stopReceiver(message, error)
  onStop(message, error)
  futureExecutionContext.shutdownNow()
 
stopLatch.countDown()
}

 

/** Stop receiver */
def stopReceiver(message: String, error: Option[Throwable]): Unit = synchronized {
 
try {
   
logInfo("Stopping receiver with message: " + message + ": " + error.getOrElse(""))
   
receiverState match {
     
case Initialized =>
       
logWarning("Skip stopping receiver because it has not yet stared")
     
case Started =>
       
receiverState = Stopped
       
receiver.onStop()
       
logInfo("Called receiver onStop")
       
onReceiverStop(message, error)
      case Stopped =>
       
logWarning("Receiver has been stopped")
   
}
  } catch {
   
case NonFatal(t) =>
     
logError("Error stopping receiver " + streamId + t.getStackTraceString)
 
}
}

子类实现

override protected def onReceiverStop(message: String, error: Option[Throwable]) {
 
logInfo("Deregistering receiver " + streamId)
 
val errorString = error.map(Throwables.getStackTraceAsString).getOrElse("")
 
trackerEndpoint.askWithRetry[Boolean](DeregisterReceiver(streamId, message, errorString))
 
logInfo("Stopped receiver " + streamId)
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值