除了消费组管理,GroupMetadataManager 另外的功能是提供消费者组位移的管理,主要包括位移数据的保存和查询。实际上,当消费者组程序在查询位移时,Kafka 总是从内存中的位移缓存数据查询,而不会直接读取底层的位移主题数据。
保存消费者组位移
storeOffsets 方法负责保存消费者组位移。
def storeOffsets(group: GroupMetadata, // group:消费者组元数据
consumerId: String, // consumerId:消费者组成员ID
offsetMetadata: immutable.Map[TopicPartition, OffsetAndMetadata], // offsetMetadata:待保存的位移值,按照分区分组
responseCallback: immutable.Map[TopicPartition, Errors] => Unit, // responseCallback:处理完成后的回调函数
producerId: Long = RecordBatch.NO_PRODUCER_ID, // producerId:事务型Producer ID
producerEpoch: Short = RecordBatch.NO_PRODUCER_EPOCH): Unit = { // producerEpoch:事务型Producer Epoch值
// first filter out partitions with offset metadata size exceeding limit
// 过滤出满足特定条件的待保存位移数据
val filteredOffsetMetadata = offsetMetadata.filter { case (_, offsetAndMetadata) =>
validateOffsetMetadataLength(offsetAndMetadata.metadata)
}
group.inLock {
if (!group.hasReceivedConsistentOffsetCommits)
warn(s"group: ${group.groupId} with leader: ${group.leaderOrNull} has received offset commits from consumers as well " +
s"as transactional producers. Mixing both types of offset commits will generally result in surprises and " +
s"should be avoided.")
}
val isTxnOffsetCommit = producerId != RecordBatch.NO_PRODUCER_ID
// construct the message set to append
// 如果没有任何分区的待保存位移满足特定条件
if (filteredOffsetMetadata.isEmpty) {
// compute the final error codes for the commit response
// 构造OFFSET_METADATA_TOO_LARGE异常并调用responseCallback返回
val commitStatus = offsetMetadata.map { case (k, _) => k -> Errors.OFFSET_METADATA_TOO_LARGE }
responseCallback(commitStatus)
None
} else {
// 查看当前Broker是否为给定消费者组的Coordinator
getMagic(partitionFor(group.groupId)) match {
case Some(magicValue) =>
// 如果是Coordinator
// We always use CREATE_TIME, like the producer. The conversion to LOG_APPEND_TIME (if necessary) happens automatically.
val timestampType = TimestampType.CREATE_TIME
val timestamp = time.milliseconds()
// 构造位移主题的位移提交消息
val records = filteredOffsetMetadata.map { case (topicPartition, offsetAndMetadata) =>
val key = GroupMetadataManager.offsetCommitKey(group.groupId, topicPartition)
val value = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, interBrokerProtocolVersion)
new SimpleRecord(timestamp, key, value)
}
val offsetTopicPartition = new TopicPartition(Topic.GROUP_METADATA_TOPIC_NAME, partitionFor(group.groupId))
// 为写入消息创建内存Buffer
val buffer = ByteBuffer.allocate(AbstractRecords.estimateSizeInBytes(magicValue, compressionType, records.asJava))
if (isTxnOffsetCommit && magicValue < RecordBatch.MAGIC_VALUE_V2)
throw Errors.UNSUPPORTED_FOR_MESSAGE_FORMAT.exception("Attempting to make a transaction offset commit with an invalid magic: " + magicValue)
// 创建MemoryRecords
val builder = MemoryRecords.builder(buffer, magicValue, compressionType, timestampType, 0L, time.milliseconds(),
producerId, producerEpoch, 0, isTxnOffsetCommit, RecordBatch.NO_PARTITION_LEADER_EPOCH)
records.foreach(builder.append)
val entries = Map(offsetTopicPartition -> builder.build())
// set the callback function to insert offsets into cache after log append completed
// putCacheCallback是将多个消费者组位移值填充到 GroupMetadata 的 offsets 元数据缓存中。
def putCacheCallback(responseStatus: Map[TopicPartition, PartitionResponse]): Unit = {
// the append response should only contain the topics partition
// 确保消息写入到指定位移主题分区,否则抛出异常
if (responseStatus.size != 1 || !responseStatus.contains(offsetTopicPartition))
throw new IllegalStateException("Append status %s should only have one partition %s"
.format(responseStatus, offsetTopicPartition))
// record the number of offsets committed to the log
// 更新已提交位移数指标
offsetCommitsSensor.record(records.size)
// construct the commit response status and insert
// the offset and metadata to cache if the append status has no error
val status = responseStatus(offsetTopicPartition)
val responseError = group.inLock {
// 写入结果没有错误
if (status.error == Errors.NONE) {
// 如果不是Dead状态
if (!group.is(Dead)) {
filteredOffsetMetadata.foreach { case (topicPartition, offsetAndMetadata) =>
if (isTxnOffsetCommit)
group.onTxnOffsetCommitAppend(producerId, topicPartition, CommitRecordMetadataAndOffset(Some(status.baseOffset), offsetAndMetadata))
else
// 调用GroupMetadata的onOffsetCommitAppend方法填充元数据
group.onOffsetCommitAppend(topicPartition, CommitRecordMetadataAndOffset(Some(status.baseOffset), offsetAndMetadata))
}
}
Errors.NONE
} else {
if (!group.is(Dead)) {
if (!group.hasPendingOffsetCommitsFromProducer(producerId))
removeProducerGroup(producerId, group.groupId)
filteredOffsetMetadata.foreach { case (topicPartition, offsetAndMetadata) =>
if (isTxnOffsetCommit)
group.failPendingTxnOffsetCommit(producerId, topicPartition)
else
// 取消未完成的位移消息写入
group.failPendingOffsetWrite(topicPartition, offsetAndMetadata)
}
}
debug(s"Offset commit $filteredOffsetMetadata from group ${group.groupId}, consumer $consumerId " +
s"with generation ${group.generationId} failed when appending to log due to ${status.error.exceptionName}")
// transform the log append error code to the corresponding the commit status error code
// 确认异常类型
status.error match {
case Errors.UNKNOWN_TOPIC_OR_PARTITION
| Errors.NOT_ENOUGH_REPLICAS
| Errors.NOT_ENOUGH_REPLICAS_AFTER_APPEND =>
Errors.COORDINATOR_NOT_AVAILABLE
case Errors.NOT_LEADER_FOR_PARTITION
| Errors.KAFKA_STORAGE_ERROR =>
Errors.NOT_COORDINATOR
case Errors.MESSAGE_TOO_LARGE
| Errors.RECORD_LIST_TOO_LARGE
| Errors.INVALID_FETCH_SIZE =>
Errors.INVALID_COMMIT_OFFSET_SIZE
case other => other
}
}
}
// compute the final error codes for the commit response
// 利用异常类型构建提交返回状态
val commitStatus = offsetMetadata.map { case (topicPartition, offsetAndMetadata) =>
if (validateOffsetMetadataLength(offsetAndMetadata.metadata))
(topicPartition, responseError)
else
(topicPartition, Errors.OFFSET_METADATA_TOO_LARGE)
}
// finally trigger the callback logic passed from the API layer
// 调用回调函数
responseCallback(commitStatus)
}
if (isTxnOffsetCommit) {
group.inLock {
addProducerGroup(producerId, group.groupId)
group.prepareTxnOffsetCommit(producerId, offsetMetadata)
}
} else {
group.inLock {
group.prepareOffsetCommit(offsetMetadata)
}
}
// 写入消息到位移主题,同时调用putCacheCallback方法更新消费者元数据
// 该方法就是调用 ReplicaManager 的 appendRecords 方法,将消息写入到位移主题中。
appendForGroup(group, entries, putCacheCallback)
// 如果是Coordinator
case None =>
// 如果不是Coordinator
// 构造NOT_COORDINATOR异常并提交给responseCallback方法
val commitStatus = offsetMetadata.map { case (topicPartition, _) =>
(topicPartition, Errors.NOT_COORDINATOR)
}
responseCallback(commitStatus)
None
}
}
}
查询消费者组位移
getOffsets 方法如下:
def getOffsets(groupId: String, topicPartitionsOpt: Option[Seq[TopicPartition]]): Map[TopicPartition, OffsetFetchResponse.PartitionData] = {
trace("Getting offsets of %s for group %s.".format(topicPartitionsOpt.getOrElse("all partitions"), groupId))
// 从groupMetadataCache字段中获取指定消费者组的元数据
val group = groupMetadataCache.get(groupId)
if (group == null) { // 如果没有组数据,返回空数据
topicPartitionsOpt.getOrElse(Seq.empty[TopicPartition]).map { topicPartition =>
val partitionData = new OffsetFetchResponse.PartitionData(OffsetFetchResponse.INVALID_OFFSET,
Optional.empty(), "", Errors.NONE)
topicPartition -> partitionData
}.toMap
} else { // 如果存在组数据
group.inLock {
// 如果组处于Dead状态,则返回空数据
if (group.is(Dead)) {
topicPartitionsOpt.getOrElse(Seq.empty[TopicPartition]).map { topicPartition =>
val partitionData = new OffsetFetchResponse.PartitionData(OffsetFetchResponse.INVALID_OFFSET,
Optional.empty(), "", Errors.NONE)
topicPartition -> partitionData
}.toMap
} else {
topicPartitionsOpt match {
// 如果没有该分区位移数据,返回空数据
case None =>
// Return offsets for all partitions owned by this consumer group. (this only applies to consumers
// that commit offsets to Kafka.)
group.allOffsets.map { case (topicPartition, offsetAndMetadata) =>
topicPartition -> new OffsetFetchResponse.PartitionData(offsetAndMetadata.offset,
offsetAndMetadata.leaderEpoch, offsetAndMetadata.metadata, Errors.NONE)
}
case Some(topicPartitions) =>
// 从消费者组元数据中返回指定分区的位移数据
topicPartitions.map { topicPartition =>
val partitionData = group.offset(topicPartition) match {
case None =>
new OffsetFetchResponse.PartitionData(OffsetFetchResponse.INVALID_OFFSET,
Optional.empty(), "", Errors.NONE)
case Some(offsetAndMetadata) =>
new OffsetFetchResponse.PartitionData(offsetAndMetadata.offset,
offsetAndMetadata.leaderEpoch, offsetAndMetadata.metadata, Errors.NONE)
}
topicPartition -> partitionData
}.toMap
}
}
}
}
}