我们先来看上一节的一段代码
//去更新元数据
//addresses 这个地址其实就是我们写producer代码的时候,传进去了一个broker的地址。
this.metadata.update(Cluster.bootstrap(addresses), time.milliseconds());
ChannelBuilder channelBuilder = ClientUtils.createChannelBuilder(config.values());
这段代码看起来像是去服务端拉取元数据,这里我们就去验证一下,这里是否真的去拉取元数据
这里我们来看一下update方法
public synchronized void update(Cluster cluster, long now) {
Objects.requireNonNull(cluster, "cluster should not be null");
this.needUpdate = false;
this.lastRefreshMs = now;
this.lastSuccessfulRefreshMs = now;
this.version += 1;
//这个默认值是true,所以这段代码默认执行
if (topicExpiryEnabled) {
// Handle expiry of topics from the metadata refresh set.
//但是我们目前topics是空的
//所以下面的代码是不会被运行的
for (Iterator<Map.Entry<String, Long>> it = topics.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<String, Long> entry = it.next();
long expireMs = entry.getValue();
if (expireMs == TOPIC_EXPIRY_NEEDS_UPDATE)
entry.setValue(now + TOPIC_EXPIRY_MS);
else if (expireMs <= now) {
it.remove();
log.debug("Removing unused topic {} from the metadata list, expiryMs {} now {}", entry.getKey(), expireMs, now);
}
}
}
for (Listener listener: listeners)
listener.onMetadataUpdate(cluster);
String previousClusterId = cluster.clusterResource().clusterId();
//默认值是false,所以这个分支的代码不会去运行
if (this.needMetadataForAllTopics) {
// the listener may change the interested topics, which could cause another metadata refresh.
// If we have already fetched all topics, however, another fetch should be unnecessary.
this.needUpdate = false;
this.cluster = getClusterForCurrentTopics(cluster);
} else {
//所以代码执行的是这
//直接把刚刚传进来的对象赋值给了cluster
//cluster代表的是kafka集群的元数据。
this.cluster = cluster;
}
// The bootstrap cluster is guaranteed not to have any useful information
if (!cluster.isBootstrapConfigured()) {
String clusterId = cluster.clusterResource().clusterId();
if (clusterId == null ? previousClusterId != null : !clusterId.equals(previousClusterId))
log.info("Cluster ID: {}", cluster.clusterResource().clusterId());
clusterResourceListeners.onUpdate(cluster.clusterResource());
}
notifyAll();
log.debug("Updated cluster metadata version {} to {}", this.version, this.cluster);
}
在这里我们可以看到update方法初始化的时候并没有去服务端拉取元数据,至此我们已经将初始化的流程看完了。
接下来我们来看一下metadata,就是客户端这要管理服务端的元数据,用到了哪些数据结构去管理
跟进去
public class KafkaProducer<K, V> implements Producer<K, V> {
private static final Logger log = LoggerFactory.getLogger(KafkaProducer.class);
private static final AtomicInteger PRODUCER_CLIENT_ID_SEQUENCE = new AtomicInteger(1);
private static final String JMX_PREFIX = "kafka.producer";
private String clientId;
private final Partitioner partitioner;
private final int maxRequestSize;
private final long totalMemorySize;
//管理元数据的对象
private final Metadata metadata;
private final RecordAccumulator accumulator;
private final Sender sender;
private final Metrics metrics;
private final Thread ioThread;
private final CompressionType compressionType;
private final Sensor errors;
private final Time time;
private final Serializer<K> keySerializer;
private final Serializer<V> valueSerializer;
private final ProducerConfig producerConfig;
private final long maxBlockTimeMs;
private final int requestTimeoutMs;
private final ProducerInterceptors<K, V> interceptors;
再跟进去
*/
public final class Metadata {
private static final Logger log = LoggerFactory.getLogger(Metadata.class);
public static final long TOPIC_EXPIRY_MS = 5 * 60 * 1000;
private static final long TOPIC_EXPIRY_NEEDS_UPDATE = -1L;
//两次更新元数据请求的最小时间间隔,默认值100ms
//目的是减少网络的压力
private final long refreshBackoffMs;
//多久自动更新一次元数据,默认值是5min更新一次
private final long metadataExpireMs;
//对于producer端来讲,元数据是有版本号的
//每次更新元数据,都会修改一下这个版本号
private int version;
//上一次更新元数据的时间
private long lastRefreshMs;
//上一次成功更新元数据的时间
//如果正常的情况下,如果每次都是更新成功的,那么这个值和上一个值应该是相同的
private long lastSuccessfulRefreshMs;
//kafka集群本身的元数据
private Cluster cluster;
//这是一个标识,用来判断是否更新元数据的标识之一。
private boolean needUpdate;
/* Topics with expiry time */
//记录了当前已有的topics
private final Map<String, Long> topics;
private final List<Listener> listeners;
private final ClusterResourceListeners clusterResourceListeners;
private boolean needMetadataForAllTopics;
private final boolean topicExpiryEnabled;
这里可以看到生产者是使用的Metadata对象来管理元数据,真正的元数据存在Cluster里面,我们再跟进去看一下
public final class Cluster {
private final boolean isBootstrapConfigured;
//一个kafka集群是有多个节点的,这个参数代表的就是kafka服务器的信息
private final List<Node> nodes;
//没有授权的topic
private final Set<String> unauthorizedTopics;
private final Set<String> internalTopics;
/**
* 我们发现这里搞了很多数据结构,这些数据结构里面有些数据可能是冗余的
* 目的就是快速高效的去操作这些数据,空间换时间
*/
//代表的是一个partition和partition对应的信息
//因为我们的partition有副本
private final Map<TopicPartition, PartitionInfo> partitionsByTopicPartition;
//一个topic有哪些副本
private final Map<String, List<PartitionInfo>> partitionsByTopic;
//一个topic对应哪些可用的partition
private final Map<String, List<PartitionInfo>> availablePartitionsByTopic;
//一台服务器上面有哪些partition(服务器用的是服务器的编号)
private final Map<Integer, List<PartitionInfo>> partitionsByNode;
//服务器编号和服务器对应的关系
private final Map<Integer, Node> nodesById;
//kafka集群的Id信息
private final ClusterResource clusterResource;
我们来看一下Node里面的信息
public class Node {
private static final Node NO_NODE = new Node(-1, "", -1);
//ID编号,这个编号使我们配置参数的时候指定的
private final int id;
private final String idString;
//主机名
private final String host;
//端口号:默认9092
private final int port;
//机架
private final String rack;
接下来可以看一下分区信息
public class PartitionInfo {
//主题
private final String topic;
//分区编号
private final int partition;
//leader partition 在哪台服务器上面
private final Node leader;
//这个分区的所有的replica都在哪些节点上面
private final Node[] replicas;
//ISR列表
private final Node[] inSyncReplicas;