Spark Streaming源码解读之Driver容错安全性

Driver的容错,主要体现在ReceivedBlockTracker、DStreamGraph和JobGenerator容错安全性

从数据层面,ReceivedBlockTracker为整个Spark Streaming应用程序记录元数据信息。

从调度层面,DStreamGraph和JobGenerator是Spark Streaming调度的核心,记录当前调度到哪一进度,和业务有关。

 

若开启WAL,则将元数据写入WAL,加入ReceivedBlockQueue,此处都是未分配的Block,最后返回结果。

// ReceivedBlockTracker.scala line 85
def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
 try {
   val writeResult = writeToLog(BlockAdditionEvent(receivedBlockInfo))
   if (writeResult) {
     synchronized {
       getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
     }
     logDebug(s"Stream ${receivedBlockInfo.streamId} received " +
       s"block ${receivedBlockInfo.blockStoreResult.blockId}")
   } else {
     logDebug(s"Failed to acknowledge stream ${receivedBlockInfo.streamId} receiving " +
       s"block ${receivedBlockInfo.blockStoreResult.blockId} in the Write Ahead Log.")
   }
   writeResult
 } catch {
   case NonFatal(e) =>
     logError(s"Error adding block $receivedBlockInfo", e)
     false
 }
}

见writeToLog

// ReceivedBlockTracker.scala line 227
  /** Write an update to the tracker to the write ahead log */
  private def writeToLog(record: ReceivedBlockTrackerLogEvent): Boolean = {
    if (isWriteAheadLogEnabled) {
      logTrace(s"Writing record: $record")
      try {
        writeAheadLogOption.get.write(ByteBuffer.wrap(Utils.serialize(record)),
          clock.getTimeMillis())
        true
      } catch {
        case NonFatal(e) =>
          logWarning(s"Exception thrown while writing record: $record to the WriteAheadLog.", e)
          false
      }
    } else {
      true
    }
  }

isWriteAheadLogEnabled

// ReceivedBlockTracker.scala line 250
  /** Optionally create the write ahead log manager only if the feature is enabled */
  private def createWriteAheadLog(): Option[WriteAheadLog] = {
    checkpointDirOption.map { checkpointDir =>
      val logDir = ReceivedBlockTracker.checkpointDirToLogDir(checkpointDirOption.get)
      WriteAheadLogUtils.createLogForDriver(conf, logDir, hadoopConf)
    }
  }

checkpoint目录

// ReceivedBlockTracker.scala line 263
  def checkpointDirToLogDir(checkpointDir: String): String = {
    new Path(checkpointDir, "receivedBlockMetadata").toString
  }

 

上述都是BlockInfo的WAL,都是数据的元信息。

通过前文可知,加上时间维度:

// JobGenerator.scala line 58
  private val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
    longTime => eventLoop.post(GenerateJobs(new Time(longTime))), "JobGenerator")

// RecurringTimer.scala line 23
// 此处的period就是上面的ssc.graph.batchDuration.milliseconds
private[streaming]
class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name: String)
  extends Logging

// RecurringTimer.scala line 92
  private def triggerActionForNextInterval(): Unit = {
    clock.waitTillTime(nextTime)
    callback(nextTime)
    prevTime = nextTime
    nextTime += period // 准备下次的时间为本次时间+间隔时间
    logDebug("Callback for " + name + " called at time " + prevTime)
  }

将Block按时间分配给对应的batch,

// JobGenerator.scala line 240 spark 1.6.0
  private def generateJobs(time: Time) {
    // Set the SparkEnv in this thread, so that job generation code can access the environment
    // Example: BlockRDDs are created in this thread, and it needs to access BlockManager
    // Update: This is probably redundant after threadlocal stuff in SparkEnv has been removed.
    SparkEnv.set(ssc.env)
    Try {
 // 按时间将block分配到batch中
      jobScheduler.receiverTracker.allocateBlocksToBatch(time) // allocate received blocks to batch
      graph.generateJobs(time) // generate jobs using allocated block
    } match {
      case Success(jobs) =>
        val streamIdToInputInfos = jobScheduler.inputInfoTracker.getInfo(time)
        jobScheduler.submitJobSet(JobSet(time, jobs, streamIdToInputInfos))
      case Failure(e) =>
        jobScheduler.reportError("Error generating jobs for time " + time, e)
    }
// 最后做checkpoint操作
    eventLoop.post(DoCheckpoint(time, clearCheckpointDataLater = false))
  }

jobScheduler.receiverTracker.allocateBlocksToBatch(time)中做了什么?

  def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
    if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
      val streamIdToBlocks = streamIds.map { streamId =>
          (streamId, getReceivedBlockQueue(streamId).dequeueAll(x => true))
      }.toMap
      val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
// 此处的writeToLog虽然每次都调用,但是方法内部还是会判断WAL是否开启。
      if (writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))) {
        timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
        lastAllocatedBatchTime = batchTime
      } else {
        logInfo(s"Possibly processed batch $batchTime need to be processed again in WAL recovery")
      }
    } else {
      // This situation occurs when:
      // 1. WAL is ended with BatchAllocationEvent, but without BatchCleanupEvent,
      // possibly processed batch job or half-processed batch job need to be processed again,
      // so the batchTime will be equal to lastAllocatedBatchTime.
      // 2. Slow checkpointing makes recovered batch time older than WAL recovered
      // lastAllocatedBatchTime.
      // This situation will only occurs in recovery time.
      logInfo(s"Possibly processed batch $batchTime need to be processed again in WAL recovery")
    }
  }

writeToLog内部

// ReceivedBlockTracker.scala line 227
// 这里WAL的是Block的元数据
  /** Write an update to the tracker to the write ahead log */
  private def writeToLog(record: ReceivedBlockTrackerLogEvent): Boolean = {
    if (isWriteAheadLogEnabled) {
      logTrace(s"Writing record: $record")
      try {
        writeAheadLogOption.get.write(ByteBuffer.wrap(Utils.serialize(record)),
          clock.getTimeMillis())
        true
      } catch {
        case NonFatal(e) =>
          logWarning(s"Exception thrown while writing record: $record to the WriteAheadLog.", e)
          false
      }
    } else {
      true
    }
  }

至此,ReceivedBlockTracker的安全容错已经完成。

 

回到JobGenerator.generatorJobs,每次定时生成Job后都要调用DoCheckpoint

// JobGenerator.scala line 256 generatorJobs的最后
eventLoop.post(DoCheckpoint(time, clearCheckpointDataLater = false))

 

先从所有的outputStream按lineage 回溯 各自 所有的DStream并checkpoint。

再将ssc checkpoint

// JobGenerator.scala line 290 
  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean) {
    if (shouldCheckpoint && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
      logInfo("Checkpointing graph for time " + time)
      ssc.graph.updateCheckpointData(time)
      checkpointWriter.write(new Checkpoint(ssc, time), clearCheckpointDataLater)
    }
  }

 DStream的 checkpoint

// DStreamGraph.scala line 132
  def updateCheckpointData(time: Time) {
    logInfo("Updating checkpoint data for time " + time)
    this.synchronized {
      outputStreams.foreach(_.updateCheckpointData(time))
    }
    logInfo("Updated checkpoint data for time " + time)
  }

// DStream.scala line 491
  private[streaming] def updateCheckpointData(currentTime: Time) {
    logDebug("Updating checkpoint data for time " + currentTime)
    checkpointData.update(currentTime)
    dependencies.foreach(_.updateCheckpointData(currentTime))
    logDebug("Updated checkpoint data for time " + currentTime + ": " + checkpointData)
  }

ssc的checkpoint

// Checkpoint.scala line 275
  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) {
    try {
      val bytes = Checkpoint.serialize(checkpoint, conf)
      executor.execute(new CheckpointWriteHandler(
        checkpoint.checkpointTime, bytes, clearCheckpointDataLater))
      logInfo("Submitted checkpoint of time " + checkpoint.checkpointTime + " writer queue")
    } catch {
      case rej: RejectedExecutionException =>
        logError("Could not submit checkpoint task to the thread pool executor", rej)
    }
  }

总结:

ReceivedBlockTracker是通过WAL方式来进行数据容错的。

DStreamGraph和JobGenerator是通过checkpoint方式来进行数据容错的。
 

 

转载于:https://my.oschina.net/corleone/blog/679412

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值