kafka 副本同步-ISR定时检查线程

KafkaServer.startup -> replicaManager.startup()
定时调度的线程
    scheduler.schedule("isr-expiration", maybeShrinkIsr, period = config.replicaLagTimeMaxMs, unit = TimeUnit.MILLISECONDS)
   ->  partition.maybeShrinkIsr
  private def tryCompleteDelayedRequests() {
    val requestKey = new TopicPartitionOperationKey(this.topic, this.partitionId)
    replicaManager.tryCompleteDelayedFetch(requestKey)
    replicaManager.tryCompleteDelayedProduce(requestKey)
  }

  def maybeShrinkIsr(replicaMaxLagTimeMs: Long) {
    val leaderHWIncremented = inWriteLock(leaderIsrUpdateLock) {
      leaderReplicaIfLocal() match {
        case Some(leaderReplica) =>

          //获取到要被移除出去的replica
          //TODO 这个就是我们这节课的重点
          //面试的时候也有可能会问
          //或者对于我们理解kafka ISR机制是有帮助。
          val outOfSyncReplicas = getOutOfSyncReplicas(leaderReplica, replicaMaxLagTimeMs)
          if(outOfSyncReplicas.nonEmpty) {

            //ISR列表里面 减去 要被移除出去的 等于 新的ISR列表
            val newInSyncReplicas = inSyncReplicas -- outOfSyncReplicas
            assert(newInSyncReplicas.nonEmpty)
            info("Shrinking ISR for partition [%s,%d] from %s to %s".format(topic, partitionId,
              inSyncReplicas.map(_.brokerId).mkString(","), newInSyncReplicas.map(_.brokerId).mkString(",")))
            // update ISR in zk and in cache
            //更新ISR列表
            updateIsr(newInSyncReplicas)
            // we may need to increment high watermark since ISR could be down to 1

            replicaManager.isrShrinkRate.mark()

            //ISR列表更新了以后,HW的值有可能要发生变化
            //HW  min(p0,p1,p2)LEO  p0=100 p1=110 p2=80 HW=80
            //HW  min(p0,p1)LEO  HW=100
            maybeIncrementLeaderHW(leaderReplica)
          } else {
            false
          }

        case None => false // do nothing if no longer leader
      }
    }

    // some delayed operations may be unblocked after HW changed
    if (leaderHWIncremented)
      tryCompleteDelayedRequests()
  }

getOutOfSyncReplicas()
  def getOutOfSyncReplicas(leaderReplica: Replica, maxLagMs: Long): Set[Replica] = {
    /**
     * there are two cases that will be handled here -
     * 1. Stuck followers: If the leo of the replica hasn't been updated for maxLagMs ms,
     *                     the follower is stuck and should be removed from the ISR
     * 2. Slow followers: If the replica has not read up to the leo within the last maxLagMs ms,
     *                    then the follower is lagging and should be removed from the ISR
     * Both these cases are handled by checking the lastCaughtUpTimeMs which represents
     * the last time when the replica was fully caught up. If either of the above conditions
     * is violated, that replica is considered to be out of sync
     *
     **/
    val leaderLogEndOffset = leaderReplica.logEndOffset
    val candidateReplicas = inSyncReplicas - leaderReplica

      //过滤延迟的replica
    //TODO 移除延迟的replica只有一个条件,至少在咱们看的这个源码里面只有一个条件
    //0.10.1.0
    // (time.milliseconds - r.lastCaughtUpTimeMs) > maxLagMs
    //     当前时间 - 上一次过来同步数据的时间 大于 一个最大延迟时间,就把这个replica
    //从ISR列表里面移除出去。
    //说明了意思就是,如果一个replica长时间【10秒】没有发送请求到leader partition去同步数据
    //那么就从ISR列表里面移除出去。

    //TODO 结论:
    //如果一个replica 超过10秒没有到leader parttion拉取数据,那么就会从ISR列表里面移除出去。
    //ISR(p0,p1,p2)
    //leader HW=min(20000,20010)  20000
    //HW 值前面的数据。消费者才能看得到。
    val laggingReplicas = candidateReplicas.filter(r => (time.milliseconds - r.lastCaughtUpTimeMs) > maxLagMs)
    if(laggingReplicas.nonEmpty)
      debug("Lagging replicas for partition %s are %s".format(TopicAndPartition(topic, partitionId), laggingReplicas.map(_.brokerId).mkString(",")))

    laggingReplicas
  }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值