概述
Kafka源码包含多个模块,每个模块负责不同的功能。以下是一些核心模块及其功能的概述:
-
服务端源码 :实现Kafka Broker的核心功能,包括日志存储、控制器、协调器、元数据管理及状态机管理、延迟机制、消费者组管理、高并发网络架构模型实现等。
-
Java客户端源码 :实现了Producer和Consumer与Broker的交互机制,以及通用组件支撑代码。
-
Connect源码 :用来构建异构数据双向流式同步服务。
-
Stream源码 :用来实现实时流处理相关功能。
-
Raft源码 :实现了Raft一致性协议。
-
Admin模块 :Kafka的管理员模块,操作和管理其topic,partition相关,包含创建,删除topic,或者拓展分区等。
-
Api模块 :负责数据交互,客户端与服务端交互数据的编码与解码。
-
Client模块 :包含Producer读取Kafka Broker元数据信息的类,如topic和分区,以及leader。
-
Cluster模块 :包含Broker、Cluster、Partition、Replica等实体类。
-
Common模块 :包含各种异常类以及错误验证。
-
Consumer模块 :消费者处理模块,负责客户端消费者数据和逻辑处理。
-
Controller模块 :负责中央控制器的选举,分区的Leader选举,Replica的分配或重新分配,分区和副本的扩容等。
-
Coordinator模块 :负责管理部分consumer group和他们的offset。
-
Javaapi模块 :提供Java语言的Producer和Consumer的API接口。
-
Log模块 :负责Kafka文件存储,读写所有Topic消息数据。
-
Message模块 :封装多条数据组成数据集或压缩数据集。
-
Metrics模块 :负责内部状态监控。
-
Network模块 :处理客户端连接,网络事件模块。
-
Producer模块 :生产者细节实现,包括同步和异步消息发送。
-
Security模块 :负责Kafka的安全验证和管理。
-
Serializer模块 :序列化和反序列化消息内容。
-
Server模块 :涉及Leader和Offset的checkpoint,动态配置,延时创建和删除Topic,Leader选举,Admin和Replica管理等。
-
Tools模块 :包含多种工具,如导出consumer offset值,LogSegments信息,Topic的log位置信息,Zookeeper上的offset值等。
-
Utils模块 :包含各种工具类,如Json,ZkUtils,线程池工具类,KafkaScheduler公共调度器类等。
这些模块共同构成了Kafka的整体架构,使其能够提供高吞吐量、高可用性的消息队列服务。
kafka源码分支为1.0.2
kafka创建topic的脚本形如:
bin/kafka-topics.sh
--create #表示创建topic
--zookeeper localhost:2181 #zk地址
--replication-factor 3 #副本数
--partitions 2 #分区数
--topic t01 #topic名称
可以看到脚本中最后调用了TopicCmmand类,并将所有参数传入:
exec $(dirname $0)/kafka-run-class.sh kafka.admin.TopicCommand "$@"
kafka.admin.TopicCommand类:
def main(args: Array[String]): Unit = {
val opts = new TopicCommandOptions(args)
if(args.length == 0)
CommandLineUtils.printUsageAndDie(opts.parser, "Create, delete, describe, or change a topic.")
// should have exactly one action
val actions = Seq(opts.createOpt, opts.listOpt, opts.alterOpt, opts.describeOpt, opts.deleteOpt).count(opts.options.has _)
//确保操作只能是--list, --describe, --create, --alter or --delete中的一种
if(actions != 1)
CommandLineUtils.printUsageAndDie(opts.parser, "Command must include exactly one action: --list, --describe, --create, --alter or --delete")
//参数校验
opts.checkArgs()
//初始化zk连接
val zkUtils = ZkUtils(opts.options.valueOf(opts.zkConnectOpt),
30000,
30000,
JaasUtils.isZkSecurityEnabled())
var exitCode = 0
try {
if(opts.options.has(opts.createOpt)) {
//创建topic
createTopic(zkUtils, opts)
} else if(opts.options.has(opts.alterOpt)) {
//修改topic
alterTopic(zkUtils, opts)
} else if(opts.options.has(opts.listOpt)) {
//获取列表
listTopics(zkUtils, opts)
} else if(opts.options.has(opts.describeOpt)) {
//获取topic描述
describeTopic(zkUtils, opts)
} else if(opts.options.has(opts.deleteOpt)) {
//删除topic
deleteTopic(zkUtils, opts)
}
} catch {
case e: Throwable =>
println("Error while executing topic command : " + e.getMessage)
error(Utils.stackTrace(e))
exitCode = 1
} finally {
zkUtils.close()
Exit.exit(exitCode)
}
}
def createTopic(zkUtils: ZkUtils, opts: TopicCommandOptions) {
val topic = opts.options.valueOf(opts.topicOpt)
val configs = parseTopicConfigsToBeAdded(opts)
val ifNotExists = opts.options.has(opts.ifNotExistsOpt)
if (Topic.hasCollisionChars(topic))
println("WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both.")
try {
//若指定了--replica-assignment参数,即每个分区的副本列表分配到broker的策略关系
if (opts.options.has(opts.replicaAssignmentOpt)) {
//解析partition副本和brokerId的关系
val assignment = parseReplicaAssignment(opts.options.valueOf(opts.replicaAssignmentOpt))
//更新zk路径上的分区副本和brokerid的关系
AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkUtils, topic, assignment, configs, update = false)
} else {
//若未指定--replica-assignment参数,则需要校验必要参数:--partitions和--replication-factor
CommandLineUtils.checkRequiredArgs(opts.parser, opts.options, opts.partitionsOpt, opts.replicationFactorOpt)
val partitions = opts.options.valueOf(opts.partitionsOpt).intValue
val replicas = opts.options.valueOf(opts.replicationFactorOpt).intValue
//是否开启机架感知,默认开启
val rackAwareMode = if (opts.options.has(opts.disableRackAware)) RackAwareMode.Disabled
else RackAwareMode.Enforced
//创建topic
AdminUtils.createTopic(zkUtils, topic, partitions, replicas, configs, rackAwareMode)
}
println("Created topic \"%s\".".format(topic))
} catch {
case e: TopicExistsException => if (!ifNotExists) throw e
}
}
def parseReplicaAssignment(replicaAssignmentList: String): Map[Int, List[Int]] = {
//如3个分区,2副本,3个broker。则一种分配策略为:1:2,1:3,2:3
val partitionList = replicaAssignmentList.split(",")
val ret = new mutable.HashMap[Int, List[Int]]()
//遍历每个分区的broker列表
for (i <- 0 until partitionList.size) {
val brokerList = partitionList(i).split(":").map(s => s.trim().toInt)
//检查是否有重复的broker
val duplicateBrokers = CoreUtils.duplicates(brokerList)
if (duplicateBrokers.nonEmpty)
throw new AdminCommandFailedException("Partition replica lists may not contain duplicate entries: %s".format(duplicateBrokers.mkString(",")))
//设置分区号和brokerId列表的关系
ret.put(i, brokerList.toList)
//检查为每个分区指定的brokerId数是不是相同的,即副本因子一致性
if (ret(i).size != ret(0).size)
throw new AdminOperationException("Partition " + i + " has different replication factor: " + brokerList)
}
ret.toMap
}
未直接指定–replica-assignment参数时,会调用kafka.admin.AdminUtils.createTopic()创建topic:
def createTopic(zkUtils: ZkUtils,
topic: String,
partitions: Int,
replicationFactor: Int,
topicConfig: Properties = new Properties,
rackAwareMode: RackAwareMode = RackAwareMode.Enforced) {
//获取brokerid和机架关系
val brokerMetadatas = getBrokerMetadatas(zkUtils, rackAwareMode)
//生成分区副本和brokerid的分配策略
val replicaAssignment = AdminUtils.assignReplicasToBrokers(brokerMetadatas, partitions, replicationFactor)
//更新zk元数据
AdminUtils.createOrUpdateTopicPartitionAssignmentPathInZK(zkUtils, topic, replicaAssignment, topicConfig)
}
def assignReplicasToBrokers(brokerMetadatas: Seq[BrokerMetadata],
nPartitions: Int,
replicationFactor: Int,
fixedStartIndex: Int = -1,
startPartitionId: Int = -1): Map[Int, Seq[Int]] = {
//参数校验
if (nPartitions <= 0)
throw new InvalidPartitionsException("Number of partitions must be larger than 0.")
if (replicationFactor <= 0)
throw new InvalidReplicationFactorException("Replication factor must be larger than 0.")
//副本因子不能超过broker数量
if (replicationFactor > brokerMetadatas.size)
throw new InvalidReplicationFactorException(s"Replication factor: $replicationFactor larger than available brokers: ${brokerMetadatas.size}.")
//若所有broker均没有指定机架信息
if (brokerMetadatas.forall(_.rack.isEmpty)) {
//未指定机架时,分区副本分配到broker的策略
assignReplicasToBrokersRackUnaware(nPartitions, replicationFactor, brokerMetadatas.map(_.id), fixedStartIndex,
startPartitionId)
} else {
//否则,需要所有broker都指定机架信息
if (brokerMetadatas.exists(_.rack.isEmpty))
throw new AdminOperationException("Not all brokers have rack information for replica rack aware assignment.")
//指定机架时,分区副本分配到broker的策略
assignReplicasToBrokersRackAware(nPartitions, replicationFactor, brokerMetadatas, fixedStartIndex,
startPartitionId)
}
}
def createOrUpdateTopicPartitionAssignmentPathInZK(zkUtils: ZkUtils,
topic: String,
partitionReplicaAssignment: Map[Int, Seq[Int]],
config: Properties = new Properties,
update: Boolean = false) {
validateCreateOrUpdateTopic(zkUtils, topic, partitionReplicaAssignment, config, update)
// Configs only matter if a topic is being created. Changing configs via AlterTopic is not supported
if (!update) {
//更新zk节点/config/topics/${topic}的配置信息
// write out the config if there is any, this isn't transactional with the partition assignments
writeEntityConfig(zkUtils, getEntityConfigPath(ConfigType.Topic, topic), config)
}
//将分区副本和brokerId的关系写入zk路径/brokers/topics/${topic}
// create the partition assignment
writeTopicPartitionAssignment(zkUtils, topic, partitionReplicaAssignment, update)
}
private def writeTopicPartitionAssignment(zkUtils: ZkUtils, topic: String, replicaAssignment: Map[Int, Seq[Int]], update: Boolean) {
try {
//zk路径为/brokers/topics/${topic}
val zkPath = getTopicPath(topic)
val jsonPartitionData = zkUtils.replicaAssignmentZkData(replicaAssignment.map(e => e._1.toString -> e._2))
if (!update) {
info("Topic creation " + jsonPartitionData.toString)
//将数据写入zk
zkUtils.createPersistentPath(zkPath, jsonPartitionData)
} else {
info("Topic update " + jsonPartitionData.toString)
zkUtils.updatePersistentPath(zkPath, jsonPartitionData)
}
debug("Updated path %s with %s for replica assignment".format(zkPath, jsonPartitionData))
} catch {
case _: ZkNodeExistsException => throw new TopicExistsException(s"Topic '$topic' already exists.")
case e2: Throwable => throw new AdminOperationException(e2.toString)
}
}
至此,Broker的创建流程结束。