Partition 定义了 Partition#checkEnoughReplicasReachOffset 方法和 Partition#maybeIncrementLeaderHW 方法,分别用于检测指定 offset 之前的消息是否已经被 ISR 集合中足够多的 follower 副本确认(ack),以及尝试向后移动 leader 副本的 HW 值。.
checkEnoughReplicasReachOffset方法
用于检测指定 offset 之前的消息是否已经被 ISR 集合中足够多的 follower 副本确认。
def checkEnoughReplicasReachOffset(requiredOffset: Long): (Boolean, Errors) = {
leaderLogIfLocal match {
// 如果当前副本是 leader 副本
case Some(leaderLog) =>
// keep the current immutable replica list reference
val curInSyncReplicaIds = inSyncReplicaIds
if (isTraceEnabled) {
def logEndOffsetString: ((Int, Long)) => String = {
case (brokerId, logEndOffset) => s"broker $brokerId: $logEndOffset"
}
val curInSyncReplicaObjects = (curInSyncReplicaIds - localBrokerId).map(getReplicaOrException)
val replicaInfo = curInSyncReplicaObjects.map(replica => (replica.brokerId, replica.logEndOffset))
val localLogInfo = (localBrokerId, localLogOrException.logEndOffset)
val (ackedReplicas, awaitingReplicas) = (replicaInfo + localLogInfo).partition { _._2 >= requiredOffset}
trace(s"Progress awaiting ISR acks for offset $requiredOffset: " +
s"acked: ${ackedReplicas.map(logEndOffsetString)}, " +
s"awaiting ${awaitingReplicas.map(logEndOffsetString)}")
}
// 对应 min.insync.replicas 配置
val minIsr = leaderLog.config.minInSyncReplicas
// 如果当前请求的 offset 小于等于 HW 的 offset
if (leaderLog.highWatermark >= requiredOffset) {
/*
* The topic may be configured not to accept messages if there are not enough replicas in ISR
* in this scenario the request was already appended locally and then added to the purgatory before the ISR was shrunk
*/
if (minIsr <= curInSyncReplicaIds.size)
// 如果当前分区的 ISR 集合大小大于等于允许的最小值
(true, Errors.NONE)
else // 否则返回 NOT_ENOUGH_REPLICAS_AFTER_APPEND 错误
(true, Errors.NOT_ENOUGH_REPLICAS_AFTER_APPEND)
} else
(false, Errors.NONE)
case None =>
// 如果当前副本是 follower 副本,则返回 NOT_LEADER_FOR_PARTITION 错误
(false, Errors.NOT_LEADER_FOR_PARTITION)
}
}
maybeIncrementLeaderHW方法
用于尝试向后移动 leader 副本的 HW 值。
private def maybeIncrementLeaderHW(leaderLog: Log, curTime: Long = time.milliseconds): Boolean = {
inReadLock(leaderIsrUpdateLock) {
// maybeIncrementLeaderHW is in the hot path, the following code is written to
// avoid unnecessary collection generation
// 获取位于 ISR 集合中,或最近一次从 leader 拉取消息的时间戳位于指定时间范围(对应 replica.lag.time.max.ms 配置)内的所有副本的 LEO 值
// 以这些副本中最小的 LEO 值作为 leader 副本新的 HW 值
var newHighWatermark = leaderLog.logEndOffsetMetadata
remoteReplicasMap.values.foreach { replica =>
// 远端副本的leo小于新leader的leo ,并且,远端副本在isr内或者还没有在replicaLagTimeMaxMs超时前没跟上。那么用远端副本更新HW值
if (replica.logEndOffsetMetadata.messageOffset < newHighWatermark.messageOffset &&
(curTime - replica.lastCaughtUpTimeMs <= replicaLagTimeMaxMs || inSyncReplicaIds.contains(replica.brokerId))) {
// 更新HW值
newHighWatermark = replica.logEndOffsetMetadata
}
}
// 调用log.scala中的更新HW逻辑
leaderLog.maybeIncrementHighWatermark(newHighWatermark) match {
case Some(oldHighWatermark) =>
debug(s"High watermark updated from $oldHighWatermark to $newHighWatermark")
true
case None =>
def logEndOffsetString: ((Int, LogOffsetMetadata)) => String = {
case (brokerId, logEndOffsetMetadata) => s"replica $brokerId: $logEndOffsetMetadata"
}
if (isTraceEnabled) {
val replicaInfo = remoteReplicas.map(replica => (replica.brokerId, replica.logEndOffsetMetadata)).toSet
val localLogInfo = (localBrokerId, localLogOrException.logEndOffsetMetadata)
trace(s"Skipping update high watermark since new hw $newHighWatermark is not larger than old value. " +
s"All current LEOs are ${(replicaInfo + localLogInfo).map(logEndOffsetString)}")
}
false
}
}
}
其中它调用了log.scala中的maybeIncrementHighWatermark,这里回顾一下
def maybeIncrementHighWatermark(newHighWatermark: LogOffsetMetadata): Option[LogOffsetMetadata] = {
if (newHighWatermark.messageOffset > logEndOffset)
throw new IllegalArgumentException(s"High watermark $newHighWatermark update exceeds current " +
s"log end offset $logEndOffsetMetadata")
lock.synchronized {
val oldHighWatermark = fetchHighWatermarkMetadata
// Ensure that the high watermark increases monotonically. We also update the high watermark when the new
// offset metadata is on a newer segment, which occurs whenever the log is rolled to a new segment.
// 比较新旧 HW 值,如果旧的 HW 小于新的 HW,或者旧的 HW 对应的 LogSegment 的 baseOffset 小于新的 HW 的 LogSegment 对象的 baseOffset,则更新
if (oldHighWatermark.messageOffset < newHighWatermark.messageOffset ||
(oldHighWatermark.messageOffset == newHighWatermark.messageOffset && oldHighWatermark.onOlderSegment(newHighWatermark))) {
updateHighWatermarkMetadata(newHighWatermark)
Some(oldHighWatermark)
} else {
None
}
}
}