之前介绍了按照时间空间老化消息的定时任务,本篇来看一下 LogCleaner 线程,如果在配置中指定了 log.cleaner.enable=true,那么在 LogManager#startup 方法的最后会调用 LogCleaner#startup 方法启动 LogCleaner 线程对日志数据执行清理工作。本篇针对配置了 cleanup.policy=compact 策略的清理方式,kafka 的 LogCleaner 线程就会对具备相同 key 的消息进行清理操作,仅保留当前具备最大 offset 的 key 的消息。
从 LogCleaner#startup 方法开始,startup启动注册在 LogCleaner 中的 CleanerThread 线程集合。CleanerThread的doWork方法的核心是tryCleanFilthiestLog,选取一个最需要被清理的 LogSegment 区间,并执行清理工作。
private def cleanFilthiestLog(): Boolean = {
val preCleanStats = new PreCleanStats()
// 选取下一个最需要进行日志清理的 LogToClean 对象
val cleaned = cleanerManager.grabFilthiestCompactedLog(time, preCleanStats) match {
case None =>
// 没有需要被清理的 LogToClean 对象,休息一会后继续尝试
false
case Some(cleanable) =>
// there's a log, clean it
this.lastPreCleanStats = preCleanStats
try {
// 调用 cleanLog 方法执行清理工作
cleanLog(cleanable)
true
} catch {
case e @ (_: ThreadShutdownException | _: ControlThrowable) => throw e
case e: Exception => throw new LogCleaningException(cleanable.log, e.getMessage, e)
}
}
// 获取所有启用了 compact 和 delete 清理策略的 Log 对象,并将其对应的 topic 分区状态设置为 LogCleaningInProgress
val deletable: Iterable[(TopicPartition, Log)] = cleanerManager.deletableLogs()
try {
deletable.foreach { case (_, log) =>
try {
// 对设置了清理策略为 delete 的 LogSegment 执行删除操作,删除过期或过大的 LogSegment 对象。
// 设置了清理策略为 delete 这个在deleteOldSegments会判断。
log.deleteOldSegments()
} catch {
case e @ (_: ThreadShutdownException | _: ControlThrowable) => throw e
case e: Exception => throw new LogCleaningException(log, e.getMessage, e)
}
}
} finally {
// 移除这些 topic 分区对应的 LogCleaningInProgress 状态
cleanerManager.doneDeleting(deletable.map(_._1))
}
cleaned
}
其中grabFilthiestCompactedLog 方法选取下一个最需要被清理的 LogToClean 对象,然后调用 Cleaner#clean 依据该对象执行清理操作
def grabFilthiestCompactedLog(time: Time, preCleanStats: PreCleanStats = new PreCleanStats()): Option[LogToClean] = {
inLock(lock) {
val now = time.milliseconds
this.timeOfLastRun = now
// 读取 log 目录下的 cleaner-offset-checkpoint 文件,获取每个 topic 分区上次清理操作的 offset 边界
val lastClean = allCleanerCheckpoints
val dirtyLogs = logs.filter {
// 过滤掉 cleanup.policy 配置为 delete 的 Log 对象,因为不需要压缩
case (_, log) => log.config.compact // match logs that are marked as compacted
}.filterNot {
case (topicPartition, log) =>
// skip any logs already in-progress and uncleanable partitions
// 过滤掉所有正在执行清理工作的 Log 对象
inProgress.contains(topicPartition) || isUncleanablePartition(log, topicPartition)
}.map {
// 将需要被清理的区间封装成 LogToClean 对象
case (topicPartition, log) => // create a LogToClean instance for each
try {
val lastCleanOffset = lastClean.get(topicPartition)
// 计算需要执行清理操作的 offset 区间
val offsetsToClean = cleanableOffsets(log, lastCleanOffset, now)
// update checkpoint for logs with invalid checkpointed offsets
if (offsetsToClean.forceUpdateCheckpoint)
updateCheckpoints(log.dir.getParentFile(), Option(topicPartition, offsetsToClean.firstDirtyOffset))
val compactionDelayMs = maxCompactionDelay(log, offsetsToClean.firstDirtyOffset, now)
preCleanStats.updateMaxCompactionDelay(compactionDelayMs)
// 构建清理区间对应的 LogToClean 对象
LogToClean(topicPartition, log, offsetsToClean.firstDirtyOffset, offsetsToClean.firstUncleanableDirtyOffset, compactionDelayMs > 0)
} catch {
case e: Throwable => throw new LogCleaningException(log,
s"Failed to calculate log cleaning stats for partition $topicPartition", e)
}
}.filter(ltc => ltc.totalBytes > 0) // skip any empty logs
// 获取待清理区间最大的 cleanableRatio 比率
this.dirtiestLogCleanableRatio = if (dirtyLogs.nonEmpty) dirtyLogs.max.cleanableRatio else 0
// and must meet the minimum threshold for dirty byte ratio or have some bytes required to be compacted
// 过滤掉所有 cleanableRatio 小于等于配置值(对应 min.cleanable.dirty.ratio 配置)的 LogToClean 对象
// min.cleanable.dirty.ratio清理日志的最小压缩率,50%,即又50%以上的dirty data才会清理
val cleanableLogs = dirtyLogs.filter { ltc =>
(ltc.needCompactionNow && ltc.cleanableBytes > 0) || ltc.cleanableRatio > ltc.log.config.minCleanableRatio
}
if(cleanableLogs.isEmpty) {
None
} else {
preCleanStats.recordCleanablePartitions(cleanableLogs.size)
// 基于需要清理的数据占比选择最需要执行清理的 LogToClean 对象
val filthiest = cleanableLogs.max
// 更新对应 topic 分区的清理状态为 LogCleaningInProgress
inProgress.put(filthiest.topicPartition, LogCleaningInProgress)
Some(filthiest)
}
}
}
计算待清理区间的过程由 LogCleanerManager#cleanableOffsets 方法实现,区间值包括 dirty 部分的起始 offset 值和 uncleanable LogSegment 对象的 baseOffset 值。方法实现如下:
def cleanableOffsets(log: Log, // 待清理的 Log 对象
lastCleanOffset: Option[Long], // 记录每个 topic 分区上一次清理操作的结束 offset
now: Long): OffsetsToClean = {
// If the log segments are abnormally truncated and hence the checkpointed offset is no longer valid;
// reset to the log starting offset and log the error
// 计算下一次执行清理操作的起始 offset
// 一般都对应着 cleaner-offset-checkpoint 文件中记录的上次执行清理操作的结束 offset
// 但是考虑到当前 topic 分区可能是第一次执行清理操作,或者 offset 对应的 LogSegment 可能已经被删除,
// 所以需要将其与当前 Log 对象的首个 LogSegment 的 baseOffset 进行对比,选择较大值。
val (firstDirtyOffset, forceUpdateCheckpoint) = {
// 获取当前 Log 对象 中首个 LogSegment 对应的 baseOffset
val logStartOffset = log.logStartOffset
// 如果 cleaner-offset-checkpoint 中没有当前 topic 分区的相关记录 或 记录的offset小于logStartOffset,
// 则以当前 Log 对象中的起始 logStartOffset 作为下一次需要被清理的起始 offset 位置
val checkpointDirtyOffset = lastCleanOffset.getOrElse(logStartOffset)
if (checkpointDirtyOffset < logStartOffset) {
// Don't bother with the warning if compact and delete are enabled.
if (!isCompactAndDelete(log))
warn(s"Resetting first dirty offset of ${log.name} to log start offset $logStartOffset " +
s"since the checkpointed offset $checkpointDirtyOffset is invalid.")
(logStartOffset, true)
} else if (checkpointDirtyOffset > log.logEndOffset) {
// The dirty offset has gotten ahead of the log end offset. This could happen if there was data
// corruption at the end of the log. We conservatively assume that the full log needs cleaning.
warn(s"The last checkpoint dirty offset for partition ${log.name} is $checkpointDirtyOffset, " +
s"which is larger than the log end offset ${log.logEndOffset}. Resetting to the log start offset $logStartOffset.")
(logStartOffset, true)
} else {
(checkpointDirtyOffset, false)
}
}
// 获取配置的清理滞后时间(对应 min.compaction.lag.ms 配置)
val minCompactionLagMs = math.max(log.config.compactionLagMs, 0L)
// find first segment that cannot be cleaned
// neither the active segment, nor segments with any messages closer to the head of the log than the minimum compaction lag time
// may be cleaned
// 计算本次不应该被清理的 LogSegment 对应的最小 offset 值, 即清理区间的结束 offset
val firstUncleanableDirtyOffset: Long = Seq(
// we do not clean beyond the first unstable offset
log.firstUnstableOffset,
// the active segment is always uncleanable
// activeSegment 不能执行清理操作,避免竞态条件
Option(log.activeSegment.baseOffset),
// the first segment whose largest message timestamp is within a minimum time lag from now
if (minCompactionLagMs > 0) {
// dirty log segments
// 获取需要被清理的 LogSegment 对象,即在 firstDirtyOffset 到 activeSegment 之间的 LogSegment 对象集合
val dirtyNonActiveSegments = log.nonActiveLogSegmentsFrom(firstDirtyOffset)
dirtyNonActiveSegments.find { s =>
// 如果 LogSegment 的最大消息时间戳距离当前在 compactionLagMs 范围内,则不能执行清理操作
val isUncleanable = s.largestTimestamp > now - minCompactionLagMs
debug(s"Checking if log segment may be cleaned: log='${log.name}' segment.baseOffset=${s.baseOffset} " +
s"segment.largestTimestamp=${s.largestTimestamp}; now - compactionLag=${now - minCompactionLagMs}; " +
s"is uncleanable=$isUncleanable")
isUncleanable
}.map(_.baseOffset)
} else None
).flatten.min
debug(s"Finding range of cleanable offsets for log=${log.name}. Last clean offset=$lastCleanOffset " +
s"now=$now => firstDirtyOffset=$firstDirtyOffset firstUncleanableOffset=$firstUncleanableDirtyOffset " +
s"activeSegment.baseOffset=${log.activeSegment.baseOffset}")
OffsetsToClean(firstDirtyOffset, math.max(firstDirtyOffset, firstUncleanableDirtyOffset), forceUpdateCheckpoint)
}
其中cleanLog执行删除的操作,它的核心是Cleaner.clean函数。
private[log] def clean(cleanable: LogToClean): (Long, CleanerStats) = {
// figure out the timestamp below which it is safe to remove delete tombstones
// this position is defined to be a configurable time beneath the last modified time of the last clean segment
// 2. 计算删除标识
val deleteHorizonMs =
cleanable.log.logSegments(0, cleanable.firstDirtyOffset).lastOption match {
case None => 0L
case Some(seg) => seg.lastModified - cleanable.log.config.deleteRetentionMs
}
doClean(cleanable, deleteHorizonMs)
}
private[log] def doClean(cleanable: LogToClean, deleteHorizonMs: Long): (Long, CleanerStats) = {
info("Beginning cleaning of log %s.".format(cleanable.log.name))
// 需要被清理的 Log 对象
val log = cleanable.log
// 记录消息清理的状态信息
val stats = new CleanerStats()
// build the offset map
info("Building offset map for %s...".format(cleanable.log.name))
// 清理操作的 offset 上界
val upperBoundOffset = cleanable.firstUncleanableOffset
// 1. 遍历处理待清理区间的 LogSegment 对象,填充 offsetMap 对象,主要记录每个消息 key 及其对应清理区间内的最大 offset 值
buildOffsetMap(log, cleanable.firstDirtyOffset, upperBoundOffset, offsetMap, stats)
val endOffset = offsetMap.latestOffset + 1
stats.indexDone()
// determine the timestamp up to which the log will be cleaned
// this is the lower of the last active segment and the compaction lag
val cleanableHorizonMs = log.logSegments(0, cleanable.firstUncleanableOffset).lastOption.map(_.lastModified).getOrElse(0L)
// group the segments and clean the groups
// 3. 对 [0, endOffset) 区间的 LogSegment 进行分组,并以组为单位执行清理操作
info("Cleaning log %s (cleaning prior to %s, discarding tombstones prior to %s)...".format(log.name, new Date(cleanableHorizonMs), new Date(deleteHorizonMs)))
val transactionMetadata = new CleanedTransactionMetadata
val groupedSegments = groupSegmentsBySize(log.logSegments(0, endOffset), log.config.segmentSize,
log.config.maxIndexSize, cleanable.firstUncleanableOffset)
for (group <- groupedSegments)
// 完成了 offsetMap 的填充,接下来方法会依据单个 LogSegment 对象和索引文件的大小上限对需要清理的 LogSegment 对象进行分组,
// 以防止清理操作完成后生成的目标 LogSegment 对象过大或过小,保证尽量均衡。
cleanSegments(log, group, offsetMap, deleteHorizonMs, stats, transactionMetadata)
// record buffer utilization
stats.bufferUtilization = offsetMap.utilization
stats.allDone()
(endOffset, stats)
}
重点关注buildOffsetMap对象。填充的过程比较直观,该方法会遍历清理区间的消息集合直到 offsetMap 被填满或到达区间边界为止,并在遍历过程中将持有 key 的消息及其 offset 添加到 offsetMap 中,因为消息是顺序追加的,所以能够保证 offsetMap 中记录的是当前已处理消息的对应的最大 key->offset 映射。
/**
* Build a map of key_hash => offset for the keys in the cleanable dirty portion of the log to use in cleaning.
* @param log The log to use
* @param start The offset at which dirty messages begin
* @param end The ending offset for the map that is being built
* @param map The map in which to store the mappings
* @param stats Collector for cleaning statistics
* 这里的 offsetMap 是一个 kafka 自定义实现的 SkimpyOffsetMap 类型,
* 其中主要记录了每个消息的 key 和消息在清理区间的最大 offset 值的映射关系,
* 后面需要依据该 offsetMap 来确定需要剔除和保留的消息。
*/
private[log] def buildOffsetMap(log: Log, // 待清理的 Log 对象
start: Long, // 清理区间起始 offset
end: Long, // 清理区间结束 offset
map: OffsetMap, // 记录消息 key 及其对应的最大 offset
stats: CleanerStats): Unit = {
map.clear()
// 获取 [start, end) 之间的 LogSegment 对象,这些对象是本次需要执行清理操作的
val dirty = log.logSegments(start, end).toBuffer
val nextSegmentStartOffsets = new ListBuffer[Long]
if (dirty.nonEmpty) {
for (nextSegment <- dirty.tail) nextSegmentStartOffsets.append(nextSegment.baseOffset)
nextSegmentStartOffsets.append(end)
}
info("Building offset map for log %s for %d segments in offset range [%d, %d).".format(log.name, dirty.size, start, end))
val transactionMetadata = new CleanedTransactionMetadata
val abortedTransactions = log.collectAbortedTransactions(start, end)
transactionMetadata.addAbortedTransactions(abortedTransactions)
// Add all the cleanable dirty segments. We must take at least map.slots * load_factor,
// but we may be able to fit more (if there is lots of duplication in the dirty section of the log)
// 标识 map 是否被填充满了
var full = false
for ( (segment, nextSegmentStartOffset) <- dirty.zip(nextSegmentStartOffsets) if !full) {
// 检查当前分区的压缩状态,确保不是 LogCleaningAborted 状态
checkDone(log.topicPartition)
// 处理当前 LogSegment 中的消息集合,以消息的 key 作为 key,以遍历范围内最大 offset 作为 value,填充 offsetMap
full = buildOffsetMapForSegment(log.topicPartition, segment, map, start, nextSegmentStartOffset, log.config.maxMessageSize,
transactionMetadata, stats)
if (full)
debug("Offset map is full, %d segments fully mapped, segment with base offset %d is partially mapped".format(dirty.indexOf(segment), segment.baseOffset))
}
info("Offset map for log %s complete.".format(log.name))
}
处理当前 LogSegment 中的消息集合,以消息的 key 作为 key,以遍历范围内最大 offset 作为 value,填充 offsetMap
/**
* Add the messages in the given segment to the offset map
*
* @param segment The segment to index
* @param map The map in which to store the key=>offset mapping
* @param stats Collector for cleaning statistics
*
* @return If the map was filled whilst loading from this segment
*/
private def buildOffsetMapForSegment(topicPartition: TopicPartition,
segment: LogSegment,
map: OffsetMap,
startOffset: Long,
nextSegmentStartOffset: Long,
maxLogMessageSize: Int,
transactionMetadata: CleanedTransactionMetadata,
stats: CleanerStats): Boolean = {
// 获取清理区间起始 offset 对应的消息物理地址
var position = segment.offsetIndex.lookup(startOffset).position
// 计算当前 map 的最大容量
val maxDesiredMapSize = (map.slots * this.dupBufferLoadFactor).toInt
// 遍历处理 LogSegment 对象中的消息
while (position < segment.log.sizeInBytes) {
// 再次校验当前分区的状态,确保不是 LogCleaningAborted 状态
checkDone(topicPartition)
readBuffer.clear()
try {
// 读取消息集合
segment.log.readInto(readBuffer, position)
} catch {
case e: Exception =>
throw new KafkaException(s"Failed to read from segment $segment of partition $topicPartition " +
"while loading offset map", e)
}
val records = MemoryRecords.readableRecords(readBuffer)
throttler.maybeThrottle(records.sizeInBytes)
val startPosition = position
// 深层迭代遍历消息集合
for (batch <- records.batches.asScala) {
if (batch.isControlBatch) {
transactionMetadata.onControlBatchRead(batch)
stats.indexMessagesRead(1)
} else {
val isAborted = transactionMetadata.onBatchRead(batch)
if (isAborted) {
// If the batch is aborted, do not bother populating the offset map.
// Note that abort markers are supported in v2 and above, which means count is defined.
stats.indexMessagesRead(batch.countOrNull)
} else {
for (record <- batch.asScala) {
// 仅处理具备 key,且 offset 位于 start 之后的消息
if (record.hasKey && record.offset >= startOffset) {
// 如果 map 未满,将消息的 key 及其 offset 放入 map 中,这里会覆盖 offset 较小的 key
if (map.size < maxDesiredMapSize)
map.put(record.key, record.offset)
else
return true
}
stats.indexMessagesRead(1)
}
}
}
if (batch.lastOffset >= startOffset)
map.updateLatestOffset(batch.lastOffset)
}
val bytesRead = records.validBytes
// 向前移动地址
position += bytesRead
stats.indexBytesRead(bytesRead)
// if we didn't read even one complete message, our read buffer may be too small
// 如果 position 未向前移动,则说明未读取到一个完整的消息,需要对 buffer 进行扩容
if(position == startPosition)
growBuffersOrFail(segment.log, position, maxLogMessageSize, records)
}
// In the case of offsets gap, fast forward to latest expected offset in this segment.
map.updateLatestOffset(nextSegmentStartOffset - 1L)
restoreBuffers()
false
}
完成了 offsetMap 的填充,接下来方法会依据单个 LogSegment 对象和索引文件的大小上限对需要清理的 LogSegment 对象进行分组,以防止清理操作完成后生成的目标 LogSegment 对象过大或过小,保证尽量均衡。然后方法会遍历每个分组,对分组中的待清理 LogSegment 对象集合调用 Cleaner#cleanSegments 方法执行清理操作并生成最终的 LogSegment 对象替换清理操作前的 LogSegment 对象集合。
private[log] def cleanSegments(log: Log,
segments: Seq[LogSegment],
map: OffsetMap,
deleteHorizonMs: Long,
stats: CleanerStats,
transactionMetadata: CleanedTransactionMetadata): Unit = {
// create a new segment with a suffix appended to the name of the log and indexes
// 创建组内第一个 LogSegment 对象的 log 文件对应的“.cleaned”文件
val cleaned = LogCleaner.createNewCleanedSegment(log, segments.head.baseOffset)
transactionMetadata.cleanedIndex = Some(cleaned.txnIndex)
try {
// clean segments into the new destination segment
val iter = segments.iterator
var currentSegmentOpt: Option[LogSegment] = Some(iter.next())
val lastOffsetOfActiveProducers = log.lastRecordsOfActiveProducers
while (currentSegmentOpt.isDefined) {
val currentSegment = currentSegmentOpt.get
val nextSegmentOpt = if (iter.hasNext) Some(iter.next()) else None
val startOffset = currentSegment.baseOffset
val upperBoundOffset = nextSegmentOpt.map(_.baseOffset).getOrElse(map.latestOffset + 1)
val abortedTransactions = log.collectAbortedTransactions(startOffset, upperBoundOffset)
transactionMetadata.addAbortedTransactions(abortedTransactions)
val retainDeletesAndTxnMarkers = currentSegment.lastModified > deleteHorizonMs
info(s"Cleaning $currentSegment in log ${log.name} into ${cleaned.baseOffset} " +
s"with deletion horizon $deleteHorizonMs, " +
s"${if(retainDeletesAndTxnMarkers) "retaining" else "discarding"} deletes.")
try {
// 遍历处理需要清理的 LogSegment 对象,将清理后的数据记录到 cleaned 文件中
cleanInto(log.topicPartition, currentSegment.log, cleaned, map, retainDeletesAndTxnMarkers, log.config.maxMessageSize,
transactionMetadata, lastOffsetOfActiveProducers, stats)
} catch {
case e: LogSegmentOffsetOverflowException =>
// Split the current segment. It's also safest to abort the current cleaning process, so that we retry from
// scratch once the split is complete.
info(s"Caught segment overflow error during cleaning: ${e.getMessage}")
log.splitOverflowedSegment(currentSegment)
throw new LogCleaningAbortedException()
}
currentSegmentOpt = nextSegmentOpt
}
cleaned.onBecomeInactiveSegment()
// 将 LogSegment 对象相关的文件刷盘
// flush new segment to disk before swap
cleaned.flush()
// update the modification date to retain the last modified date of the original files
val modified = segments.last.lastModified
cleaned.lastModified = modified
// swap in new segment
info(s"Swapping in cleaned segment $cleaned for segment(s) $segments in log $log")
// 使用清理后的 LogSegment 对象替换清理之前的 LogSegment 对象集合
log.replaceSegments(List(cleaned), segments)
} catch {
case e: LogCleaningAbortedException =>
try cleaned.deleteIfExists()
catch {
case deleteException: Exception =>
e.addSuppressed(deleteException)
} finally throw e
}
}
Cleaner#cleanInto 方法的实现:
private[log] def cleanInto(topicPartition: TopicPartition, // 当前操作的 Log 对应的 topic 分区对象
sourceRecords: FileRecords, // 需要被清理的 LogSegment
dest: LogSegment, // 清理后得到 LogSegment
map: OffsetMap,
retainDeletesAndTxnMarkers: Boolean, // source.lastModified > deleteHorizonMs,当删除对应的 LogSegment 时,删除标记是否应该被保留
maxLogMessageSize: Int,
transactionMetadata: CleanedTransactionMetadata,
lastRecordsOfActiveProducers: Map[Long, LastRecord],
stats: CleanerStats): Unit = {
// 定义消息过滤器
val logCleanerFilter: RecordFilter = new RecordFilter {
var discardBatchRecords: Boolean = _
override def checkBatchRetention(batch: RecordBatch): BatchRetention = {
// we piggy-back on the tombstone retention logic to delay deletion of transaction markers.
// note that we will never delete a marker until all the records from that transaction are removed.
discardBatchRecords = shouldDiscardBatch(batch, transactionMetadata, retainTxnMarkers = retainDeletesAndTxnMarkers)
def isBatchLastRecordOfProducer: Boolean = {
// We retain the batch in order to preserve the state of active producers. There are three cases:
// 1) The producer is no longer active, which means we can delete all records for that producer.
// 2) The producer is still active and has a last data offset. We retain the batch that contains
// this offset since it also contains the last sequence number for this producer.
// 3) The last entry in the log is a transaction marker. We retain this marker since it has the
// last producer epoch, which is needed to ensure fencing.
lastRecordsOfActiveProducers.get(batch.producerId).exists { lastRecord =>
lastRecord.lastDataOffset match {
case Some(offset) => batch.lastOffset == offset
case None => batch.isControlBatch && batch.producerEpoch == lastRecord.producerEpoch
}
}
}
if (batch.hasProducerId && isBatchLastRecordOfProducer)
BatchRetention.RETAIN_EMPTY
else if (discardBatchRecords)
BatchRetention.DELETE
else
BatchRetention.DELETE_EMPTY
}
override def shouldRetainRecord(batch: RecordBatch, record: Record): Boolean = {
if (discardBatchRecords)
// The batch is only retained to preserve producer sequence information; the records can be removed
false
else
// 对消息执行过滤操作
// 保留同时满足以下条件的消息:
// 1. 消息必须具备 key,且 key 包含在 offsetMap 中;
// 2. 消息的 offset 要大于等于 offsetMap 中记录的对应的 offset 值;
// 3. 如果对应的消息是删除标记,只有在允许保留该标记是才会保留。
Cleaner.this.shouldRetainRecord(map, retainDeletesAndTxnMarkers, batch, record, stats)
}
}
var position = 0
// 遍历处理待清理的 LogSegment 对象中的消息
while (position < sourceRecords.sizeInBytes) {
// 校验对应 topic 分区的清理状态不为 LogCleaningAborted
checkDone(topicPartition)
// read a chunk of messages and copy any that are to be retained to the write buffer to be written out
readBuffer.clear()
writeBuffer.clear()
// 读取消息到 buffer
sourceRecords.readInto(readBuffer, position)
val records = MemoryRecords.readableRecords(readBuffer)
throttler.maybeThrottle(records.sizeInBytes)
// 对消息进行过滤,对需要保留的消息写入到 buffer 中
val result = records.filterTo(topicPartition, logCleanerFilter, writeBuffer, maxLogMessageSize, decompressionBufferSupplier)
stats.readMessages(result.messagesRead, result.bytesRead)
stats.recopyMessages(result.messagesRetained, result.bytesRetained)
position += result.bytesRead
// if any messages are to be retained, write them out
// 对于需要保留的消息,将其追加到清理后的 LogSegment 对象中
val outputBuffer = result.outputBuffer
if (outputBuffer.position() > 0) {
outputBuffer.flip()
val retained = MemoryRecords.readableRecords(outputBuffer)
// it's OK not to hold the Log's lock in this case, because this segment is only accessed by other threads
// after `Log.replaceSegments` (which acquires the lock) is called
dest.append(largestOffset = result.maxOffset,
largestTimestamp = result.maxTimestamp,
shallowOffsetOfMaxTimestamp = result.shallowOffsetOfMaxTimestamp,
records = retained)
throttler.maybeThrottle(outputBuffer.limit())
}
// if we read bytes but didn't get even one complete batch, our I/O buffer is too small, grow it and try again
// `result.bytesRead` contains bytes from `messagesRead` and any discarded batches.
// 如果未能读取一条完整的消息,则需要对 buffer 进行扩容
if (readBuffer.limit() > 0 && result.bytesRead == 0)
growBuffersOrFail(sourceRecords, position, maxLogMessageSize, records)
}
// 对 buffer 进行重置
restoreBuffers()
}
上述方法会深层遍历待清理 LogSegment 对象中的每一条消息,并调用 MemoryRecords#filterTo 对消息执行过滤操作,保留同时满足以下条件的消息:
1. 消息必须具备 key,且 key 包含在 offsetMap 中;
2. 消息的 offset 要大于等于 offsetMap 中记录的对应的 offset 值;
3. 如果对应的消息是删除标记,只有在允许保留该标记是才会保留。