KafkaController.shutdownBroker()方法时ControlledShutdownRequest的核心,使用ControlledShutdownLeaderSelector重新选择Leader副本和ISR集合,实现Leader副本的迁移。
def shutdownBroker(id: Int) : Set[TopicAndPartition] = {
if (!isActive()) {
throw new ControllerMovedException("Controller moved to another broker. Aborting controlled shutdown")
}
controllerContext.brokerShutdownLock synchronized {
info("Shutting down broker " + id)
inLock(controllerContext.controllerLock) {
if (!controllerContext.liveOrShuttingDownBrokerIds.contains(id))
throw new BrokerNotAvailableException("Broker id %d does not exist.".format(id))
controllerContext.shuttingDownBrokerIds.add(id)
debug("All shutting down brokers: " + controllerContext.shuttingDownBrokerIds.mkString(","))
debug("Live brokers: " + controllerContext.liveBrokerIds.mkString(","))
}
//获得待关闭的Broker上的所有Partition和副本信息
val allPartitionsAndReplicationFactorOnBroker: Set[(TopicAndPartition, Int)] =
inLock(controllerContext.controllerLock) {
controllerContext.partitionsOnBroker(id)
.map(topicAndPartition => (topicAndPartition, controllerContext.partitionReplicaAssignment(topicAndPartition).size))
}
allPartitionsAndReplicationFactorOnBroker.foreach {
case(topicAndPartition, replicationFactor) =>
// Move leadership serially to relinquish lock.
inLock(controllerContext.controllerLock) {
controllerContext.partitionLeadershipInfo.get(topicAndPartition).foreach { currLeaderIsrAndControllerEpoch =>
if (replicationFactor > 1) {//副本数是否大于1
//检测Leaader副本是否处于待关闭的Broker上
if (currLeaderIsrAndControllerEpoch.leaderAndIsr.leader == id) {
// If the broker leads the topic partition, transition the leader and update isr. Updates zk and
// notifies all affected brokers
// 将相关的Partition切换为OnlinePartition状态,重新选择ISR和leader集合,把结果写入zk中,然后发送LeaderAndISRRequest和UpdateMeatadataRequest
partitionStateMachine.handleStateChanges(Set(topicAndPartition), OnlinePartition,
controlledShutdownPartitionLeaderSelector)
} else {
// Stop the replica first. The state change below initiates ZK changes which should take some time
// before which the stop replica request should be completed (in most cases)
try {//发送StopReplicaRequest请求
brokerRequestBatch.newBatch()
brokerRequestBatch.addStopReplicaRequestForBrokers(Seq(id), topicAndPartition.topic,
topicAndPartition.partition, deletePartition = false)
brokerRequestBatch.sendRequestsToBrokers(epoch)
} catch {
case e : IllegalStateException => {
// Resign if the controller is in an illegal state
error("Forcing the controller to resign")
brokerRequestBatch.clear()
controllerElector.resign()
throw e
}
}
// If the broker is a follower, updates the isr in ZK and notifies the current leader
// 将副本转换为Offline状态
replicaStateMachine.handleStateChanges(Set(PartitionAndReplica(topicAndPartition.topic,
topicAndPartition.partition, id)), OfflineReplica)
}
}
}
}
}
//统计leader副本依然处于关闭Broker上的分区,并返回
def replicatedPartitionsBrokerLeads() = inLock(controllerContext.controllerLock) {
trace("All leaders = " + controllerContext.partitionLeadershipInfo.mkString(","))
controllerContext.partitionLeadershipInfo.filter {
case (topicAndPartition, leaderIsrAndControllerEpoch) =>
leaderIsrAndControllerEpoch.leaderAndIsr.leader == id && controllerContext.partitionReplicaAssignment(topicAndPartition).size > 1
}.map(_._1)
}
replicatedPartitionsBrokerLeads().toSet
}
}