MetadaCache是Broker用来缓存整个集群中分区状态的组件。KafkaController通过向集群中的broker发送updateMetadataRequest来更新其metadataCache中缓存的数据,每个broker在收到该请求后会异步更新metadatCache的数据。
private[server] class MetadataCache(brokerId: Int) extends Logging {
private val stateChangeLogger = KafkaController.stateChangeLogger
// 记录每个分区的状态,其中使用PartitionStateInfo记录Partition的状态
private val cache = mutable.Map[String, mutable.Map[Int, PartitionStateInfo]]()
// 现在controller的ID
private var controllerId: Option[Int] = None
// 记录当前可用的Broker嘻嘻,其中使用Broker类记录每个存活Broker的网络位置信息
private val aliveBrokers = mutable.Map[Int, Broker]()
// 可用节点的信息
private val aliveNodes = mutable.Map[Int, collection.Map[SecurityProtocol, Node]]()
private val partitionMetadataLock = new ReentrantReadWriteLock()
}
UpdateMetadataRequest由KafkaApis.handleUpdateMetadatRequest方法处理,它直接把请求交给maybeUpdateMetadataCache()处理
def maybeUpdateMetadataCache(correlationId: Int, updateMetadataRequest: UpdateMetadataRequest, metadataCache: MetadataCache) {
replicaStateChangeLock synchronized {
// 检查controllerEpoch
if(updateMetadataRequest.controllerEpoch < controllerEpoch) {
val stateControllerEpochErrorMessage = ("Broker %d received update metadata request with correlation id %d from an " +
"old controller %d with epoch %d. Latest known controller epoch is %d").format(localBrokerId,
correlationId, updateMetadataRequest.controllerId, updateMetadataRequest.controllerEpoch,
controllerEpoch)
stateChangeLogger.warn(stateControllerEpochErrorMessage)
throw new ControllerMovedException(stateControllerEpochErrorMessage)
} else {
metadataCache.updateCache(correlationId, updateMetadataRequest)
controllerEpoch = updateMetadataRequest.controllerEpoch
}
}
}
MetadataCache.updateCache()方法中完成了对aliveBrokers、cache等字段的更新。
def updateCache(correlationId: Int, updateMetadataRequest: UpdateMetadataRequest) {
inWriteLock(partitionMetadataLock) {
controllerId = updateMetadataRequest.controllerId match { //更新controllerID
case id if id < 0 => None
case id => Some(id)
}
// 将aliveNodes和aliveBrokers中的缓存全部清除掉,并从UpdateMetadataRequest中的live_broekrs字段重新构建
aliveNodes.clear()
aliveBrokers.clear()
updateMetadataRequest.liveBrokers.asScala.foreach { broker =>
val nodes = new EnumMap[SecurityProtocol, Node](classOf[SecurityProtocol])
val endPoints = new EnumMap[SecurityProtocol, EndPoint](classOf[SecurityProtocol])
broker.endPoints.asScala.foreach { case (protocol, ep) =>
endPoints.put(protocol, EndPoint(ep.host, ep.port, protocol))
nodes.put(protocol, new Node(broker.id, ep.host, ep.port))
}
aliveBrokers(broker.id) = Broker(broker.id, endPoints.asScala, Option(broker.rack))
aliveNodes(broker.id) = nodes.asScala
}
// 更新cache集合
updateMetadataRequest.partitionStates.asScala.foreach { case (tp, info) =>
val controllerId = updateMetadataRequest.controllerId
val controllerEpoch = updateMetadataRequest.controllerEpoch
if (info.leader == LeaderAndIsr.LeaderDuringDelete) {
// 删除分区对应的PartitionStateInfo
removePartitionInfo(tp.topic, tp.partition)
stateChangeLogger.trace(s"Broker $brokerId deleted partition $tp from metadata cache in response to UpdateMetadata " +
s"request sent by controller $controllerId epoch $controllerEpoch with correlation id $correlationId")
} else {
//更新partitionInfo
val partitionInfo = partitionStateToPartitionStateInfo(info)
addOrUpdatePartitionInfo(tp.topic, tp.partition, partitionInfo)
stateChangeLogger.trace(s"Broker $brokerId cached leader info $partitionInfo for partition $tp in response to " +
s"UpdateMetadata request sent by controller $controllerId epoch $controllerEpoch with correlation id $correlationId")
}
}
}
}
metadata在更新时会向服务端发送metadataRequest,metadataRequest首先由kafkaApi.handleTopicMetadataRequest方法进行处理
def handleTopicMetadataRequest(request: RequestChannel.Request) {
val metadataRequest = request.body.asInstanceOf[MetadataRequest]
val requestVersion = request.header.apiVersion()
val topics = //如果请求的topic集合字段为空集合,则读取所有的topic信息
// Handle old metadata request logic. Version 0 has no way to specify "no topics".
if (requestVersion == 0) {
if (metadataRequest.topics() == null || metadataRequest.topics().isEmpty)
metadataCache.getAllTopics()
else
metadataRequest.topics.asScala.toSet
} else {
if (metadataRequest.isAllTopics)
metadataCache.getAllTopics()
else
metadataRequest.topics.asScala.toSet
}
var (authorizedTopics, unauthorizedTopics) =
topics.partition(topic => authorize(request.session, Describe, new Resource(Topic, topic)))
if (authorizedTopics.nonEmpty) {
val nonExistingTopics = metadataCache.getNonExistingTopics(authorizedTopics)
if (config.autoCreateTopicsEnable && nonExistingTopics.nonEmpty) {
authorizer.foreach { az =>
if (!az.authorize(request.session, Create, Resource.ClusterResource)) {
authorizedTopics --= nonExistingTopics
unauthorizedTopics ++= nonExistingTopics
}
}
}
}
// 权限验证,设置未验证通过的topic集合的错误码为TOPIC_AUTHORIZATION_FAILED
val unauthorizedTopicMetadata = unauthorizedTopics.map(topic =>
new MetadataResponse.TopicMetadata(Errors.TOPIC_AUTHORIZATION_FAILED, topic, common.Topic.isInternal(topic),
java.util.Collections.emptyList()))
// In version 0, we returned an error when brokers with replicas were unavailable,
// while in higher versions we simply don't include the broker in the returned broker list
val errorUnavailableEndpoints = requestVersion == 0
// 查询metadataCache得到指定的topic信息
val topicMetadata =
if (authorizedTopics.isEmpty)
Seq.empty[MetadataResponse.TopicMetadata]
else
getTopicMetadata(authorizedTopics, request.securityProtocol, errorUnavailableEndpoints)
val completeTopicMetadata = topicMetadata ++ unauthorizedTopicMetadata
val brokers = metadataCache.getAliveBrokers
trace("Sending topic metadata %s and brokers %s for correlation id %d to client %s".format(completeTopicMetadata.mkString(","),
brokers.mkString(","), request.header.correlationId, request.header.clientId))
val responseHeader = new ResponseHeader(request.header.correlationId)
val responseBody = new MetadataResponse(
brokers.map(_.getNode(request.securityProtocol)).asJava,
metadataCache.getControllerId.getOrElse(MetadataResponse.NO_CONTROLLER_ID),
completeTopicMetadata.asJava,
requestVersion
)
// 按照metadataresponse格式创建响应
// 向requestChannel中添加响应
requestChannel.sendResponse(new RequestChannel.Response(request, new ResponseSend(request.connectionId, responseHeader, responseBody)))
}
在kafkaApi.getTopicMetadata方法中完成对MetadatCache的查询,同时还会根据配置以及Topic的名称决定是否自动创建未知的topic。
private def getTopicMetadata(topics: Set[String], securityProtocol: SecurityProtocol, errorUnavailableEndpoints: Boolean): Seq[MetadataResponse.TopicMetadata] = {
val topicResponses = metadataCache.getTopicMetadata(topics, securityProtocol, errorUnavailableEndpoints)
if (topics.isEmpty || topicResponses.size == topics.size) {
// metadataCache中可以找到全部指定topic
topicResponses
} else {
//根据配置决定是否调用createTopic方法创建未知的Topic
val nonExistentTopics = topics -- topicResponses.map(_.topic).toSet
val responsesForNonExistentTopics = nonExistentTopics.map { topic =>
if (topic == TopicConstants.GROUP_METADATA_TOPIC_NAME) {
createGroupMetadataTopic()
} else if (config.autoCreateTopicsEnable) {
createTopic(topic, config.numPartitions, config.defaultReplicationFactor)
} else {
new MetadataResponse.TopicMetadata(Errors.UNKNOWN_TOPIC_OR_PARTITION, topic, common.Topic.isInternal(topic),
java.util.Collections.emptyList())
}
}
topicResponses ++ responsesForNonExistentTopics
}
}
在metadataCache.getTopicMetadata中完成对MetadataCache的查询获取指定Topic的metadata信息
def getTopicMetadata(topics: Set[String], protocol: SecurityProtocol, errorUnavailableEndpoints: Boolean = false): Seq[MetadataResponse.TopicMetadata] = {
inReadLock(partitionMetadataLock) {
topics.toSeq.flatMap { topic =>
//使用getPartitionMetadata()方法获取PartitionMetadata,并将PartitionMetadata与Topic信息进行封装,生成TopicMetadata
getPartitionMetadata(topic, protocol, errorUnavailableEndpoints).map { partitionMetadata =>
new MetadataResponse.TopicMetadata(Errors.NONE, topic, Topic.isInternal(topic), partitionMetadata.toBuffer.asJava)
}
}
}
}
private def getPartitionMetadata(topic: String, protocol: SecurityProtocol, errorUnavailableEndpoints: Boolean): Option[Iterable[MetadataResponse.PartitionMetadata]] = {
//获取每个topic的分区集合,并对其进行遍历
cache.get(topic).map { partitions =>
partitions.map { case (partitionId, partitionState) =>
val topicPartition = TopicAndPartition(topic, partitionId)
//获取分区对应的leaderAndISR对象,其中来记录了leaderId,leaderEpoch、isr集合以及controllerEpoch
val leaderAndIsr = partitionState.leaderIsrAndControllerEpoch.leaderAndIsr
// 获取Leader副本所在的Node,其中记录了host、ip、port
val maybeLeader = getAliveEndpoint(leaderAndIsr.leader, protocol)
// 分区的AR集合
val replicas = partitionState.allReplicas
// AR集合中可用的副本
val replicaInfo = getEndpoints(replicas, protocol, errorUnavailableEndpoints)
maybeLeader match {
//分区的Leader副本可能宕机了,LEADER_NOT_AVAILABLE
case None =>
debug(s"Error while fetching metadata for $topicPartition: leader not available")
new MetadataResponse.PartitionMetadata(Errors.LEADER_NOT_AVAILABLE, partitionId, Node.noNode(),
replicaInfo.asJava, java.util.Collections.emptyList())
case Some(leader) =>
// 获取分区的ISR集合
val isr = leaderAndIsr.isr
// 获取ISR集合中可用的副本
val isrInfo = getEndpoints(isr, protocol, errorUnavailableEndpoints)
// 检测AR集合中副本是不是都可用
if (replicaInfo.size < replicas.size) {
debug(s"Error while fetching metadata for $topicPartition: replica information not available for " +
s"following brokers ${replicas.filterNot(replicaInfo.map(_.id).contains).mkString(",")}")
new MetadataResponse.PartitionMetadata(Errors.REPLICA_NOT_AVAILABLE, partitionId, leader,
replicaInfo.asJava, isrInfo.asJava)
// 检测ISR集合中的 副本是否都可用
} else if (isrInfo.size < isr.size) {
debug(s"Error while fetching metadata for $topicPartition: in sync replica information not available for " +
s"following brokers ${isr.filterNot(isrInfo.map(_.id).contains).mkString(",")}")
new MetadataResponse.PartitionMetadata(Errors.REPLICA_NOT_AVAILABLE, partitionId, leader,
replicaInfo.asJava, isrInfo.asJava)
} else {
// AR和ISR集合副本都是可用的
new MetadataResponse.PartitionMetadata(Errors.NONE, partitionId, leader, replicaInfo.asJava,
isrInfo.asJava)
}
}
}
}
}