Kafka 副本管理模块（二）：ReplicaFetcherThread

最新推荐文章于 2023-05-03 17:55:31 发布

其实系一个须刨

最新推荐文章于 2023-05-03 17:55:31 发布

阅读量1.6k

点赞数

分类专栏： kafka-2.4.1

本文链接：https://blog.csdn.net/lianggx3/article/details/108901897

版权

kafka-2.4.1 专栏收录该内容

55 篇文章 19 订阅

订阅专栏

ReplicaFetcherThread是AbstractFetcherThread的子类，实现了对应的函数，它的定义如下：

class ReplicaFetcherThread(name: String, // 线程名称
                           // 单台 Broker 上，允许存在多个 ReplicaFetcherThread 线程。Broker 端参数 num.replica.fetchers，决定了 Kafka 到底创建多少个 Follower 拉取线程。
                           fetcherId: Int, // Follower 拉取的线程 Id，也就是线程的编号
                           sourceBroker: BrokerEndPoint, // 数据源Broker地址，即分区的leader是在哪个节点
                           brokerConfig: KafkaConfig, // 它封装了 Broker 端所有的参数信息。同样地，ReplicaFetcherThread 类也是通过它来获取 Broker 端指定参数的值。
                           failedPartitions: FailedPartitions, // 处理过程中出现失败的分区
                           replicaMgr: ReplicaManager, // 副本管理器。该线程类通过副本管理器来获取分区对象、副本对象以及它们下面的日志对象。
                           metrics: Metrics,
                           time: Time,
                           quota: ReplicaQuota, // 用做限流。用作 Follower 副本拉取速度控制
                           leaderEndpointBlockingSend: Option[BlockingSend] = None // 这是用于实现同步发送请求的类。
                           // 所谓的同步发送，是指该线程使用它给指定 Broker 发送请求，然后线程处于阻塞状态，直到接收到 Broker 返回的 Response。
                          )
  extends AbstractFetcherThread(name = name,
                                clientId = name,
                                sourceBroker = sourceBroker,
                                failedPartitions,
                                fetchBackOffMs = brokerConfig.replicaFetchBackoffMs,
                                isInterruptible = false) {
  // 副本Id就是副本所在Broker的Id
  private val replicaId = brokerConfig.brokerId
  private val logContext = new LogContext(s"[ReplicaFetcher replicaId=$replicaId, leaderId=${sourceBroker.id}, " +
    s"fetcherId=$fetcherId] ")
  this.logIdent = logContext.logPrefix
  // 用于执行请求发送的类
  private val leaderEndpoint = leaderEndpointBlockingSend.getOrElse(
    new ReplicaFetcherBlockingSend(sourceBroker, brokerConfig, metrics, time, fetcherId,
      s"broker-$replicaId-fetcher-$fetcherId", logContext))
  // Follower发送的FETCH请求被处理返回前的最长等待时间，它是 Broker 端参数 replica.fetch.wait.max.ms 的值。
  private val maxWait = brokerConfig.replicaFetchWaitMaxMs
  // 每个FETCH Response返回前必须要累积的最少字节数。它是 Broker 端参数 replica.fetch.min.bytes 的值。
  private val minBytes = brokerConfig.replicaFetchMinBytes
  // 每个合法FETCH Response的最大字节数。它是 Broker 端参数 replica.fetch.response.max.bytes 的值。
  private val maxBytes = brokerConfig.replicaFetchResponseMaxBytes
  // 单个分区能够获取到的最大字节数。它是 Broker 端参数 replica.fetch.max.bytes 的值。
  private val fetchSize = brokerConfig.replicaFetchMaxBytes
  private val brokerSupportsLeaderEpochRequest = brokerConfig.interBrokerProtocolVersion >= KAFKA_0_11_0_IV2
  // 维持某个Broker连接上获取会话状态的类
  private val fetchSessionHandler = new FetchSessionHandler(logContext, sourceBroker.id)
}

processPartitionData 方法

上一篇提到了AbstractFetcherThread中的processFetchRequest方法为发送fetch请求并处理消息，它会调用子类的processPartitionData 方法实际处理。

  override def processPartitionData(topicPartition: TopicPartition,
                                    fetchOffset: Long,
                                    partitionData: FetchData): Option[LogAppendInfo] = {
    // 从副本管理器获取指定主题分区对象
    val partition = replicaMgr.nonOfflinePartition(topicPartition).get
    // 获取日志对象
    val log = partition.localLogOrException
    // 将获取到的数据转换成符合格式要求的消息集合
    val records = toMemoryRecords(partitionData.records)

    maybeWarnIfOversizedRecords(records, topicPartition)
    // 要读取的起始位移值如果不是本地日志LEO值则视为异常情况
    // 副本从leader处拉取消息，当然是从自己的leo开始拉取，如果不是就是异常情况
    if (fetchOffset != log.logEndOffset)
      throw new IllegalStateException("Offset mismatch for partition %s: fetched offset = %d, log end offset = %d.".format(
        topicPartition, fetchOffset, log.logEndOffset))

    if (isTraceEnabled)
      trace("Follower has replica log end offset %d for partition %s. Received %d messages and leader hw %d"
        .format(log.logEndOffset, topicPartition, records.sizeInBytes, partitionData.highWatermark))

    // Append the leader's messages to the log
    // 写入Follower副本本地日志，本质上调用的还是log.scala中的append方法写入消息
    val logAppendInfo = partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false)

    if (isTraceEnabled)
      trace("Follower has replica log end offset %d after appending %d bytes of messages for partition %s"
        .format(log.logEndOffset, records.sizeInBytes, topicPartition))
    val leaderLogStartOffset = partitionData.logStartOffset

    // For the follower replica, we do not need to keep its segment base offset and physical position.
    // These values will be computed upon becoming leader or handling a preferred read replica fetch.
    val followerHighWatermark = log.updateHighWatermark(partitionData.highWatermark)
    // 尝试更新Follower副本的Log Start Offset值
    log.maybeIncrementLogStartOffset(leaderLogStartOffset)
    if (isTraceEnabled)
      trace(s"Follower set replica high watermark for partition $topicPartition to $followerHighWatermark")

    // Traffic from both in-sync and out of sync replicas are accounted for in replication quota to ensure total replication
    // traffic doesn't exceed quota.
    // 副本消息拉取限流
    if (quota.isThrottled(topicPartition))
      quota.record(records.sizeInBytes)
    replicaMgr.brokerTopicStats.updateReplicationBytesIn(records.sizeInBytes)
    // 返回日志写入结果
    logAppendInfo
  }

buildFetch 方法
方法的主要目的是，构建发送给 Leader 副本所在 Broker 的 FETCH 请求。

  override def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[FetchRequest.Builder]] = {
    val partitionsWithError = mutable.Set[TopicPartition]()

    val builder = fetchSessionHandler.newBuilder()
    // 遍历每个分区，将处于可获取状态的分区添加到builder后续统一处理
    // 对于有错误的分区加入到出错分区列表
    partitionMap.foreach { case (topicPartition, fetchState) =>
      // We will not include a replica in the fetch request if it should be throttled.
      if (fetchState.isReadyForFetch && !shouldFollowerThrottle(quota, topicPartition)) {
        try {
          // 获取日志起始位移值
          val logStartOffset = replicaMgr.localLogOrException(topicPartition).logStartOffset
          // 构建读取对象partitionData
          builder.add(topicPartition, new FetchRequest.PartitionData(
            fetchState.fetchOffset, logStartOffset, fetchSize, Optional.of(fetchState.currentLeaderEpoch)))
        } catch {
          case _: KafkaStorageException =>
            // The replica has already been marked offline due to log directory failure and the original failure should have already been logged.
            // This partition should be removed from ReplicaFetcherThread soon by ReplicaManager.handleLogDirFailure()
            partitionsWithError += topicPartition
        }
      }
    }
    // 生成fetchEquestData
    val fetchData = builder.build()
    val fetchRequestOpt = if (fetchData.sessionPartitions.isEmpty && fetchData.toForget.isEmpty) {
      None
    } else {
      // 构造FETCH请求的Builder对象
      val requestBuilder = FetchRequest.Builder
        .forReplica(fetchRequestVersion, replicaId, maxWait, minBytes, fetchData.toSend)
        .setMaxBytes(maxBytes)
        .toForget(fetchData.toForget)
        .metadata(fetchData.metadata)
      Some(requestBuilder)
    }
    // 返回Builder对象以及出错分区列表
    ResultWithPartitions(fetchRequestOpt, partitionsWithError)
  }

truncate 方法

这个方法的主要目的是对给定分区执行日志截断操作。最后还是调用logManager类里面的truncate函数

  override def truncate(tp: TopicPartition, offsetTruncationState: OffsetTruncationState): Unit = {
    // 拿到分区对象
    val partition = replicaMgr.nonOfflinePartition(tp).get
    //拿到分区本地日志
    val log = partition.localLogOrException
    // 执行截断操作，截断到的位置由offsetTruncationState的offset指定
    partition.truncateTo(offsetTruncationState.offset, isFuture = false)

    if (offsetTruncationState.offset < log.highWatermark)
      warn(s"Truncating $tp to offset ${offsetTruncationState.offset} below high watermark " +
        s"${log.highWatermark}")

    // mark the future replica for truncation only when we do last truncation
    if (offsetTruncationState.truncationCompleted)
      replicaMgr.replicaAlterLogDirsManager.markPartitionsForTruncation(brokerConfig.brokerId, tp,
        offsetTruncationState.offset)
  }