kafka源码分析--- KafkaProducer分析

最新推荐文章于 2023-04-02 23:23:13 发布

wandy0211

最新推荐文章于 2023-04-02 23:23:13 发布

阅读量405

点赞数

分类专栏： Kafka

本文链接：https://blog.csdn.net/wjandy0211/article/details/89552579

版权

Kafka 专栏收录该内容

28 篇文章 1 订阅

订阅专栏

一. KafkaProducer 简介：

KafkaProducer是线程安全对象，建议可以将其封装成多线程共享一个实例，效率反而比多实例更高！

如下是我写的producer测试类：

package com.jffox.cloud.saas.main;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;

public class ProductKafka {
    private static final Logger logger = LoggerFactory.getLogger(TestKafka.class);

    public static void main(String[] args){
        if(Constant.topic == null  || "".equals(Constant.topic)) throw new RuntimeException("topic is null");

        if(Constant.group == null  || "".equals(Constant.group)) throw new RuntimeException("group is null");

        if(Constant.thread<=0) throw new RuntimeException("thread not lg 0");

        System.out.println(String.format("into main  topic:%s,group:%s,threadNum:%s", Constant.topic, Constant.group, Constant.thread));

        try {
            Properties props = new Properties();
            props.put("bootstrap.servers", Constant.ZOOKEEPER_CONNECT);
            props.put("session.timeout.ms", "7000");
            props.put("acks", "all");
            props.put("retries", 0);
            props.put("batch.size", 16384);
            props.put("linger.ms", 1);
            props.put("buffer.memory", 33554432);
            props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
            props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");


            //创建一个容量为10的线程池
//            ExecutorService executor = Executors.newFixedThreadPool(Constant.thread);
//            for(int i=0;i<Constant.thread;i++){
//                executor.execute(new ProducerRunner(props,Constant.topic));
//            }

            ProductKafka.ProducerRunner producer = new ProductKafka.ProducerRunner(props,Constant.topic);
            producer.run();

        }catch (Throwable e){
            e.printStackTrace();
            logger.error(e.getMessage());
            logger.error("Game Over ....");
            System.exit(1);
        }
    }

    static class ProducerRunner {
        KafkaProducer<String, String> producer = null;
        public ProducerRunner(Properties props, String topic) {
            this.producer = new KafkaProducer<>(props);
        }

        public void run() {
            while (true){
                try {
                    logger.info(String.format("thread start time : %s", String.valueOf(new Date(System.currentTimeMillis()))));
                    Thread.sleep(100);
                    producer.send(new ProducerRecord<String, String>(Constant.topic , "this is my test data "));
                    logger.info(String.format("thread end time : %s", String.valueOf(new Date(System.currentTimeMillis()))));
                    logger.info(String.format("Thread sleep  is %s", 100));
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

2. kafkaproducer 消息发送流程：

2.1 构建一个KafkaProducer对象，初始化一些用到的组件，比如缓存区，Sender线程等

2.2 构建ProducerRecord

new ProducerRecord<String, String>(Constant.topic , "this is my test data ")

2.3 调用kafkaprodocuer的send方法发送ProducerRecord

producer.send(new ProducerRecord<String, String>(Constant.topic , "this is my test data "));

2.4 send前如果intercepted,则需要处理record

public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
    ProducerRecord<K, V> interceptedRecord = this.interceptors == null ? record : this.interceptors.onSend(record);
    return this.doSend(interceptedRecord, callback);
}

2.5 对Key,value进行序列化

2.6、根据传入的参数，为消息选择合适的分区，具体怎么选，后面分析

2.7、将消息按照分区发送到RecordAccmulator暂存，消息按照每个分区进行汇总

2.8、后台Sender线程被触发后从RecordAccmulator里面获取消息然后构建成ClientRequest，怎么构建后面分析

2.9、将ClientRequest封装成NetWorkClient准备发送

3.0、NetWorkClient将请求放入KafkaChannel准备发送，然后执行网络IO，最后发送到kafka server

总结kafkaproducer的执行流程如下：

二、深入分析KafkaProducer的实现

根据上面的demo，我们首先来看kafkaProducer的初始化过程，首先看一下kafkaProducer的属性

public class KafkaProducer<K, V> implements Producer<K, V> {

private static final Logger log = LoggerFactory.getLogger(KafkaProducer.class);
private static final AtomicInteger PRODUCER_CLIENT_ID_SEQUENCE = new AtomicInteger(1);
private static final String JMX_PREFIX = "kafka.producer";

private String clientId; //客户端的一个标识
private final Partitioner partitioner; //分区选择器，根据传入的参数，决定该条消息被放到哪个分区
private final int maxRequestSize; //客户端最大的消息大小
private final long totalMemorySize; //单个消息的缓存区大小
private final Metadata metadata; //kafka 元数据维护
private final RecordAccumulator accumulator; //消息暂存区
private final Sender sender; //发送消息的sender任务
private final Metrics metrics; //一些统计信息
private final Thread ioThread; //执行Sender任务发送消息的线程
private final CompressionType compressionType; //消息的压缩策略
private final Sensor errors; //
private final Time time;
private final Serializer<K> keySerializer; //key序列化
private final Serializer<V> valueSerializer; //value序列化
private final ProducerConfig producerConfig; //生产者相关配置信息
private final long maxBlockTimeMs; //在等待metadata更新的最大等待时间
private final int requestTimeoutMs; //消息的超时时间
private final ProducerInterceptors<K, V> interceptors; //消息拦截器
再看构造函数，我一般看源码习惯关注主干，不是很纠结细节，所以下面标注一些重要的东西

private KafkaProducer(ProducerConfig config, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
try {
log.trace("Starting the Kafka producer");
Map<String, Object> userProvidedConfigs = config.originals();
this.producerConfig = config;
this.time = new SystemTime();
clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
if (clientId.length() <= 0)
clientId = "producer-" + PRODUCER_CLIENT_ID_SEQUENCE.getAndIncrement();
Map<String, String> metricTags = new LinkedHashMap<String, String>();
metricTags.put("client-id", clientId);
MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS)
.tags(metricTags);
List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG,
MetricsReporter.class);
reporters.add(new JmxReporter(JMX_PREFIX));
this.metrics = new Metrics(metricConfig, reporters, time);
//初始化分区选择器
this.partitioner = config.getConfiguredInstance(ProducerConfig.PARTITIONER_CLASS_CONFIG, Partitioner.class);
long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
//序列化key
if (keySerializer == null) {
this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.keySerializer.configure(config.originals(), true);
} else {
config.ignore(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
this.keySerializer = keySerializer;
}
//序列化value
   if (valueSerializer == null) {
this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.valueSerializer.configure(config.originals(), false);
} else {
config.ignore(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
this.valueSerializer = valueSerializer;
}

// load interceptors and make sure they get clientId
userProvidedConfigs.put(ProducerConfig.CLIENT_ID_CONFIG, clientId);
List<ProducerInterceptor<K, V>> interceptorList = (List) (new ProducerConfig(userProvidedConfigs)).getConfiguredInstances(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,
ProducerInterceptor.class);
this.interceptors = interceptorList.isEmpty() ? null : new ProducerInterceptors<>(interceptorList);

ClusterResourceListeners clusterResourceListeners = configureClusterResourceListeners(keySerializer, valueSerializer, interceptorList, reporters);
//初始集群元数据
this.metadata = new Metadata(retryBackoffMs, config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG), true, clusterResourceListeners);
//初始最大的消息长度
            this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
//初始消息缓冲区大小
            this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
//初始压缩策略
          this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
/* check for user defined settings.
* If the BLOCK_ON_BUFFER_FULL is set to true,we do not honor METADATA_FETCH_TIMEOUT_CONFIG.
* This should be removed with release 0.9 when the deprecated configs are removed.
*/
if (userProvidedConfigs.containsKey(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG)) {
log.warn(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG + " config is deprecated and will be removed soon. " +
"Please use " + ProducerConfig.MAX_BLOCK_MS_CONFIG);
boolean blockOnBufferFull = config.getBoolean(ProducerConfig.BLOCK_ON_BUFFER_FULL_CONFIG);
if (blockOnBufferFull) {
this.maxBlockTimeMs = Long.MAX_VALUE;
} else if (userProvidedConfigs.containsKey(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG)) {
log.warn(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG + " config is deprecated and will be removed soon. " +
"Please use " + ProducerConfig.MAX_BLOCK_MS_CONFIG);
this.maxBlockTimeMs = config.getLong(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG);
} else {
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
}
} else if (userProvidedConfigs.containsKey(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG)) {
log.warn(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG + " config is deprecated and will be removed soon. " +
"Please use " + ProducerConfig.MAX_BLOCK_MS_CONFIG);
this.maxBlockTimeMs = config.getLong(ProducerConfig.METADATA_FETCH_TIMEOUT_CONFIG);
} else {
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
}

/* check for user defined settings.
* If the TIME_OUT config is set use that for request timeout.
* This should be removed with release 0.9
*/
if (userProvidedConfigs.containsKey(ProducerConfig.TIMEOUT_CONFIG)) {
log.warn(ProducerConfig.TIMEOUT_CONFIG + " config is deprecated and will be removed soon. Please use " +
ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG);
this.requestTimeoutMs = config.getInt(ProducerConfig.TIMEOUT_CONFIG);
} else {
this.requestTimeoutMs = config.getInt(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG);
}
//初始缓冲区RecordAccmulator
this.accumulator = new RecordAccumulator(config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
this.totalMemorySize,
this.compressionType,
config.getLong(ProducerConfig.LINGER_MS_CONFIG),
retryBackoffMs,
metrics,
time);

List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG));
this.metadata.update(Cluster.bootstrap(addresses), time.milliseconds());
ChannelBuilder channelBuilder = ClientUtils.createChannelBuilder(config.values());
//构建NetworkClient和kafkaserver进行IO
  NetworkClient client = new NetworkClient(
new Selector(config.getLong(ProducerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG), this.metrics, time, "producer", channelBuilder),
this.metadata,
clientId,
config.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION),
config.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG),
config.getInt(ProducerConfig.SEND_BUFFER_CONFIG),
config.getInt(ProducerConfig.RECEIVE_BUFFER_CONFIG),
this.requestTimeoutMs, time);
//初始发送消息的sender任务，这些任务在ioThread中运行
this.sender = new Sender(client,
this.metadata,
this.accumulator,
config.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION) == 1,
config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG),
(short) parseAcks(config.getString(ProducerConfig.ACKS_CONFIG)),
config.getInt(ProducerConfig.RETRIES_CONFIG),
this.metrics,
new SystemTime(),
clientId,
this.requestTimeoutMs);
String ioThreadName = "kafka-producer-network-thread" + (clientId.length() > 0 ? " | " + clientId : "");
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
//启动线程
this.ioThread.start();
this.errors = this.metrics.sensor("errors");
config.logUnused();
AppInfoParser.registerAppInfo(JMX_PREFIX, clientId);
log.debug("Kafka producer started");
} catch (Throwable t) {
// call close methods if internal objects are already constructed
// this is to prevent resource leak. see KAFKA-2121
close(0, TimeUnit.MILLISECONDS, true);
// now propagate the exception
throw new KafkaException("Failed to construct kafka producer", t);
}
}
       当KafkaProducer初始化好以后，调用Send进行发送，

@Override
public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
// intercept the record, which can be potentially modified; this method does not throw exceptions
ProducerRecord<K, V> interceptedRecord = this.interceptors == null ? record : this.interceptors.onSend(record);
return doSend(interceptedRecord, callback);
}
private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {
TopicPartition tp = null;
try {
// first make sure the metadata for the topic is available
//等待kafka更新元数据信息
ClusterAndWaitTime clusterAndWaitTime = waitOnMetadata(record.topic(), record.partition(), maxBlockTimeMs);
long remainingWaitMs = Math.max(0, maxBlockTimeMs - clusterAndWaitTime.waitedOnMetadataMs);
//获取元数据里面的集群相关信息
Cluster cluster = clusterAndWaitTime.cluster;
//对key进行序列化
byte[] serializedKey;
try {
serializedKey = keySerializer.serialize(record.topic(), record.key());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() +
" specified in key.serializer");
}
//对value进行序列化
  byte[] serializedValue;
try {
serializedValue = valueSerializer.serialize(record.topic(), record.value());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +
" specified in value.serializer");
}
//获取消息应该被发送到哪个分区
int partition = partition(record, serializedKey, serializedValue, cluster);
int serializedSize = Records.LOG_OVERHEAD + Record.recordSize(serializedKey, serializedValue);
ensureValidRecordSize(serializedSize);
tp = new TopicPartition(record.topic(), partition);
long timestamp = record.timestamp() == null ? time.milliseconds() : record.timestamp();
log.trace("Sending record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition);
// producer callback will make sure to call both 'callback' and interceptor callback
Callback interceptCallback = this.interceptors == null ? callback : new InterceptorCallback<>(callback, this.interceptors, tp);
//将要发送的消息追加到RecordAccmulator里面
             RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey, serializedValue, interceptCallback, remainingWaitMs);
//如果消息追加到RecordAccmulator后, 最后一个RecordBatch满了或者队列里面不止一个RecordBatch
      if (result.batchIsFull || result.newBatchCreated) {
log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);
//唤醒sender线程
                this.sender.wakeup();
}
//返回最后的结果
return result.future;
// handling exceptions and record the errors;
// for API exceptions return them in the future,
// for other exceptions throw directly
} catch (ApiException e) {
// ..................省略
}
}
      在分析waitOnMetadata之前，先说一下kafka集群的元数据，我们知道，每个topic有多个分区，每个分区有多个副本，而每个分区的副本里面都需要有一个Leader副本，其他副本只需要同步leader副本的数据即可，而Kafak的元数据就是记录了比如某个分区有哪些副本，leader副本在哪台机器上，follow副本在哪台机器上，哪些副本在ISR（可以理解为follower副本中数据和Leader副本数据相差不大的副本节点）里面 , 在kafka里面主要通过下面的几个类来进行元数据的维护

Metadata里面有个属性叫cluster, 而Cluster类主要维护了下面的关系

private final boolean isBootstrapConfigured;
//整个kafka集群的节点相关信息
private final List<Node> nodes;
private final Set<String> unauthorizedTopics;
private final Set<String> internalTopics;
//维护集群里面某个分区和具体的分区之间的映射关系
private final Map<TopicPartition, PartitionInfo> partitionsByTopicPartition;
//维护topic name和具体分区信息之间的映射关系
private final Map<String, List<PartitionInfo>> partitionsByTopic;
//维护topic name和具体分区信息之间的映射关系，这个和上一个的区别是，这个里面的分区必须是有leader的，上一个可以没有
private final Map<String, List<PartitionInfo>> availablePartitionsByTopic;
//维护节点node 和具体的分区信息，可以根据某个节点查询这个节点里的所有分区信息
private final Map<Integer, List<PartitionInfo>> partitionsByNode;
//维护节点id和具体节点之间的映射关系
private final Map<Integer, Node> nodesById;
private final ClusterResource clusterResource;
关于Node，Topicpartition，PartitionInfo 字段的定义都比较简单，这里就不累赘了。

说完了Cluster，在Metadata里面持有对Cluster的引用，Metadata的主要字段含义如下

private final long refreshBackoffMs;
private final long metadataExpireMs;
//版本号，每更新一次元数据信息，版本号+1
  private int version;
//上一次更新的时间戳
  private long lastRefreshMs;
//上一次成功更新的时间戳
  private long lastSuccessfulRefreshMs;
//记录kafka集群的元数据信息
   private Cluster cluster;
//是否需要强制更新，如果为true,会触发后台sender线程去强制更新元数据信息
  private boolean needUpdate;
/* Topics with expiry time */
//记录当前的topic信息
   private final Map<String, Long> topics;
private final List<Listener> listeners;
private final ClusterResourceListeners clusterResourceListeners;
private boolean needMetadataForAllTopics;
private final boolean topicExpiryEnabled;
     接下来我们回到waitOnMetadata，来看一下元数据是如何更新的

private ClusterAndWaitTime waitOnMetadata(String topic, Integer partition, long maxWaitMs) throws InterruptedException {
// add topic to metadata topic list if it is not there already and reset expiry
metadata.add(topic);
//获取集群元数据信息
  Cluster cluster = metadata.fetch();
//获取当前topic对应的分区个数
Integer partitionsCount = cluster.partitionCountForTopic(topic);
// Return cached metadata if we have it, and if the record's partition is either undefined
// or within the known partition range
//假如topic对应的分区信息记录在metadata里面已经有了，就是说集群加载了分区相关信息,就直接返回元数据
if (partitionsCount != null && (partition == null || partition < partitionsCount))
return new ClusterAndWaitTime(cluster, 0);

long begin = time.milliseconds();
long remainingWaitMs = maxWaitMs;
long elapsed;
// Issue metadata requests until we have metadata for the topic or maxWaitTimeMs is exceeded.
// In case we already have cached metadata for the topic, but the requested partition is greater
// than expected, issue an update request only once. This is necessary in case the metadata
// is stale and the number of partitions for this topic has increased in the meantime.
do {
log.trace("Requesting metadata update for topic {}.", topic);
//将needUpdate设置为true,表示元数据需要更新
int version = metadata.requestUpdate();
//唤醒sender线程
  sender.wakeup();
try {
//等待sender线程去更新元数据信息
metadata.awaitUpdate(version, remainingWaitMs);
} catch (TimeoutException ex) {
// Rethrow with original maxWaitMs to prevent logging exception with remainingWaitMs
throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");
}
cluster = metadata.fetch();
elapsed = time.milliseconds() - begin;
if (elapsed >= maxWaitMs)
throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");
if (cluster.unauthorizedTopics().contains(topic))
throw new TopicAuthorizationException(topic);
remainingWaitMs = maxWaitMs - elapsed;
partitionsCount = cluster.partitionCountForTopic(topic);
} while (partitionsCount == null);

if (partition != null && partition >= partitionsCount) {
throw new KafkaException(
String.format("Invalid partition given with record: %d is not in the range [0...%d).", partition, partitionsCount));
}

return new ClusterAndWaitTime(cluster, elapsed);
}
      当元数据更新后，下一步选择一个分区用来存放咱们的消息，
int partition = partition(record, serializedKey, serializedValue, cluster);
     如果你发过来的消息已经指定了某个分区，那么直接返回即可。
private int partition(ProducerRecord<K, V> record, byte[] serializedKey, byte[] serializedValue, Cluster cluster) {
Integer partition = record.partition();
return partition != null ?
partition :
partitioner.partition(
record.topic(), record.key(), serializedKey, record.value(), serializedValue, cluster);
}
     如果没有指定，调用partitioner.partition进行判断，kafka提供了默认的实现，当然你可以自己定制分发策略

public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
//根据指定topic获取所有分区信息
  List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
//获取分区个数
int numPartitions = partitions.size();
//如果没有指定消息key
          if (keyBytes == null) {

  int nextValue = counter.getAndIncrement();
//获取指定topic对应的可利用的分区信息，这些可利用是说副本有leader副本的，有些分区他是没有leader副本的，有可能因为一些原因导致
List<PartitionInfo> availablePartitions = cluster.availablePartitionsForTopic(topic);
if (availablePartitions.size() > 0) {
// 对可利用的分区数取模获取下标
  int part = Utils.toPositive(nextValue) % availablePartitions.size();
return availablePartitions.get(part).partition();
} else {
// no partitions are available, give a non-available partition
return Utils.toPositive(nextValue) % numPartitions;
}
//如果指定了消息key,直接对key进行hash后然后对分区数大小进行取模操作
} else {
// hash the keyBytes to choose a partition
return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
}
}
    总结一下：

1、如果你指定了分区，那么只会将这条消息发送到指定分区

2、如果你同时指定了分区和消息key，也是指发送到这个分区

3、如果没有指定分区，指定了消息key，那么对key进行hash后对当前分区数进行取模后得出消息应该放到哪个分区

4、如果没有分区，也没有指定key，则按照一定的轮询方式(counter和分区数取模，counter每次递增，确保消息不会发送到同一个分区里面)来获取分区数

说完如何获取消息发送的分区后，下一步就是将消息放到暂存区RecordAccumulator，我们先来看一下RecordAccumulator

从上面的3个类的关系可以看到，RecordAccmulator里面有一个ConcurrentMap，里面key是TopicPartition，value是一个双端队列，说明RecordAccmulator里面是按照每个分区进行消息缓存的，也就是相同分区的消息会放到一起，而每个RecordBatch里面持有MemoryRecords的引用，MemoryRecords才是真正放消息的地方，

MemoryRecords里面支持对消息的压缩，还有保存消息数据的java nio ByteBuffer。当我们往RecordAccmulator里面追加消息的时候，看append方法

public RecordAppendResult append(TopicPartition tp,
long timestamp,
byte[] key,
byte[] value,
Callback callback,
long maxTimeToBlock) throws InterruptedException {
// We keep track of the number of appending thread to make sure we do not miss batches in
// abortIncompleteBatches().
//统计当前正在像RecordAccmulator中追加的线程数
  appendsInProgress.incrementAndGet();
try {
// check if we have an in-progress batch
//第一步：首先查找已有的batches,如果已经有该分区对应的队列信息则直接返回，否则从新建一个
  Deque<RecordBatch> dq = getOrCreateDeque(tp);
        //因为ArrayDeque是非线程安全的，所以这里加锁进行处理
            synchronized (dq) {
if (closed)
throw new IllegalStateException("Cannot send after the producer is closed.");
               //第二步：像Deque中最后一个RecordBatch追加MemoryRecord
  RecordAppendResult appendResult = tryAppend(timestamp, key, value, callback, dq);
//如果追加成功了那么直接返回
                if (appendResult != null)
return appendResult;
}
//如果上面那一步追加的时候没有追加成功，比如某个线程发送的消息太大了，在这个队列里的最后一个MemoryRecords空间不足以存这条消息
// we don't have an in-progress record batch try to allocate a new batch
//重新计算大小
  int size = Math.max(this.batchSize, Records.LOG_OVERHEAD + Record.recordSize(key, value));
log.trace("Allocating a new {} byte message buffer for topic {} partition {}", size, tp.topic(), tp.partition());
//重新申请新的空间
ByteBuffer buffer = free.allocate(size, maxTimeToBlock);
synchronized (dq) {
// Need to check if producer is closed again after grabbing the dequeue lock.
if (closed)
throw new IllegalStateException("Cannot send after the producer is closed.");
//重新追加
RecordAppendResult appendResult = tryAppend(timestamp, key, value, callback, dq);
if (appendResult != null) {
//释放刚刚申请的空间
// Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often...
free.deallocate(buffer);
return appendResult;
}
//如果重新追加也失败了，构建一个MemoryRecords
MemoryRecords records = MemoryRecords.emptyRecords(buffer, compression, this.batchSize);
//构建一个RecordBatch
RecordBatch batch = new RecordBatch(tp, records, time.milliseconds());
//在刚刚新建的RecordBatch中追加消息
FutureRecordMetadata future = Utils.notNull(batch.tryAppend(timestamp, key, value, callback, time.milliseconds()));
//将新建的RecordBatch追加到队列的尾部
dq.addLast(batch);
incomplete.add(batch);
return new RecordAppendResult(future, dq.size() > 1 || batch.records.isFull(), true);
}
} finally {
//释放
  appendsInProgress.decrementAndGet();
}
}
       这样，消息就放到了消息缓冲区中，接下来，看一下Sender线程是如何去获取消息和Kafka server通信的。

void run(long now) {
    //获取集群元数据信息
        Cluster cluster = metadata.fetch();
// get the list of partitions with data ready to send
//从缓存区中选择出可以向哪些Node节点发送消息,具体怎么选择的，下面会分析
  RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);
//如果返回的分区存在没有leader副本的情况,那么标记元数据需要更新
// if there are any partitions whose leaders are not known yet, force metadata update
if (!result.unknownLeaderTopics.isEmpty()) {
// The set of topics with unknown leader contains topics with leader election pending as well as
// topics which may have expired. Add the topic again to metadata to ensure it is included
// and request metadata update, since there are messages to send to the topic.
for (String topic : result.unknownLeaderTopics)
this.metadata.add(topic);
this.metadata.requestUpdate();
}

// remove any nodes we aren't ready to send to
Iterator<Node> iter = result.readyNodes.iterator();
long notReadyTimeout = Long.MAX_VALUE;
while (iter.hasNext()) {
Node node = iter.next();
//针对返回的结果，检查网络IO是否符合发送条件，如果有不符合的node节点就将其移除
if (!this.client.ready(node, now)) {
iter.remove();
notReadyTimeout = Math.min(notReadyTimeout, this.client.connectionDelay(node, now));
}
}
// 获取代发送的消息集合,key为哪个节点，value为对应的所有消息
// create produce requests
Map<Integer, List<RecordBatch>> batches = this.accumulator.drain(cluster,
result.readyNodes,
this.maxRequestSize,
now);
if (guaranteeMessageOrder) {
// Mute all the partitions drained
for (List<RecordBatch> batchList : batches.values()) {
for (RecordBatch batch : batchList)
this.accumulator.mutePartition(batch.topicPartition);
}
}

List<RecordBatch> expiredBatches = this.accumulator.abortExpiredBatches(this.requestTimeout, now);
// update sensors
for (RecordBatch expiredBatch : expiredBatches)
this.sensors.recordErrors(expiredBatch.topicPartition.topic(), expiredBatch.recordCount);

sensors.updateProduceRequestMetrics(batches);
//将发送的消息构建成clietnRequest,每个node节点只会构建一个clientRequest
  List<ClientRequest> requests = createProduceRequests(batches, now);
// If we have any nodes that are ready to send + have sendable data, poll with 0 timeout so this can immediately
// loop and try sending more data. Otherwise, the timeout is determined by nodes that have partitions with data
// that isn't yet sendable (e.g. lingering, backing off). Note that this specifically does not include nodes
// with sendable data that aren't ready to send since they would cause busy looping.
long pollTimeout = Math.min(result.nextReadyCheckDelayMs, notReadyTimeout);
if (result.readyNodes.size() > 0) {
log.trace("Nodes with data ready to send: {}", result.readyNodes);
log.trace("Created {} produce requests: {}", requests.size(), requests);
pollTimeout = 0;
}
for (ClientRequest request : requests)
client.send(request, now);

// if some partitions are already ready to be sent, the select time would be 0;
// otherwise if some partition already has some data accumulated but not ready yet,
// the select time will be the time difference between now and its linger expiry time;
// otherwise the select time will be the time difference between now and the metadata expiry time;
//发送消息到服务端
this.client.poll(pollTimeout, now);
}
         在run方法里面，有几个方法很关键，下面我们一个一个来分析，第一个就是ready方法，ready方法从RecordAccmulator里面获取已经准备好的待发送的消息集合，那么怎样才算是准备好了呢？看下面方法

public ReadyCheckResult ready(Cluster cluster, long nowMs) {
Set<Node> readyNodes = new HashSet<>();
long nextReadyCheckDelayMs = Long.MAX_VALUE;
Set<String> unknownLeaderTopics = new HashSet<>();
//判断缓冲池是否被耗尽
        boolean exhausted = this.free.queued() > 0;
         //遍历所有的队列
          for (Map.Entry<TopicPartition, Deque<RecordBatch>> entry : this.batches.entrySet()) {
TopicPartition part = entry.getKey();
Deque<RecordBatch> deque = entry.getValue();
//获取某个topicPartition的Leader broker
            Node leader = cluster.leaderFor(part);
synchronized (deque) {
//如果某个分区没有leader并且队列不为null(也就是虽然该分区没有Leader但是里面有消息，消息是可用的)
  if (leader == null && !deque.isEmpty()) {
// This is a partition for which leader is not known, but messages are available to send.
// Note that entries are currently not removed from batches when deque is empty.
unknownLeaderTopics.add(part.topic());
                //如果待发送的set集合里面不包含当前待发送的node,且这个topic partition不是处于正在发送中(保证单个分区消息的有序性)
                 } else if (!readyNodes.contains(leader) && !muted.contains(part)) {
RecordBatch batch = deque.peekFirst();
if (batch != null) {
             //判断当前待发送的batch是否处于重发中，也就是再等待下次进行重发
                        boolean backingOff = batch.attempts > 0 && batch.lastAttemptMs + retryBackoffMs > nowMs;
//计算等待时间
      long waitedTimeMs = nowMs - batch.lastAttemptMs;
                        //需要等待的时间
long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
//还需要等待的时间
                        long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
//batch是否已经满了
                         boolean full = deque.size() > 1 || batch.records.isFull();
//是否超时了，也就是等待时间太长了
                         boolean expired = waitedTimeMs >= timeToWaitMs;
boolean sendable = full || expired || exhausted || closed || flushInProgress();
                      //bathc已满或者 batch已经在缓存池待了足够长时间或者缓冲池被关闭了，那么就加入待发送set里面
                        if (sendable && !backingOff) {
readyNodes.add(leader);
} else {
// Note that this results in a conservative estimate since an un-sendable partition may have
// a leader that will later be found to have sendable data. However, this is good enough
// since we'll just wake up and then sleep again for the remaining time.
nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
}
}
}
}
}

return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeaderTopics);
}
     将这些已经准备好的node返回后，接下来看一下drain方法，drain方法的作用就是，根据上一步返回的给定节点信息，从这个已经准备好消息发送的节点里面，从缓冲区中抽取节点下作为leader的topic partition的batch集合。

public Map<Integer, List<RecordBatch>> drain(Cluster cluster,
Set<Node> nodes,
int maxSize,
long now) {
if (nodes.isEmpty())
return Collections.emptyMap();

Map<Integer, List<RecordBatch>> batches = new HashMap<>();
     //遍历节点
       for (Node node : nodes) {
int size = 0;
//获取某个节点下所有的有Leader的分区信息
            List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
List<RecordBatch> ready = new ArrayList<>();
/* to make starvation less likely this loop doesn't start at 0 */
int start = drainIndex = drainIndex % parts.size();
do {
PartitionInfo part = parts.get(drainIndex);
TopicPartition tp = new TopicPartition(part.topic(), part.partition());
// Only proceed if the partition has no in-flight batches.
if (!muted.contains(tp)) {
//从缓存区中根据分区topic,分区id，找到对应的消息队列
                   Deque<RecordBatch> deque = getDeque(new TopicPartition(part.topic(), part.partition()));
if (deque != null) {
synchronized (deque) {
//获取队列的第一个batch
                            RecordBatch first = deque.peekFirst();
if (first != null) {
boolean backoff = first.attempts > 0 && first.lastAttemptMs + retryBackoffMs > now;
// Only drain the batch if it is not during backoff period.
if (!backoff) {
if (size + first.records.sizeInBytes() > maxSize && !ready.isEmpty()) {
// there is a rare case that a single batch size is larger than the request size due
// to compression; in this case we will still eventually send this batch in a single
// request
break;
} else {
RecordBatch batch = deque.pollFirst();
batch.records.close();
size += batch.records.sizeInBytes();
ready.add(batch);
batch.drainedMs = now;
}
}
}
}
}
}
this.drainIndex = (this.drainIndex + 1) % parts.size();
} while (start != drainIndex);
//最后返回的key为某个node,value是这个node上所有需要发送的消息batch
  batches.put(node.id(), ready);
}
return batches;
}
    下一个重点是createProduceRequests，这个方法是Sender用来将待发送的消息封装成ClientRequest

List<ClientRequest> requests = createProduceRequests(batches, now);
private List<ClientRequest> createProduceRequests(Map<Integer, List<RecordBatch>> collated, long now) {
List<ClientRequest> requests = new ArrayList<ClientRequest>(collated.size());
for (Map.Entry<Integer, List<RecordBatch>> entry : collated.entrySet())
//每个node节点只会构造一个ClientRequest
requests.add(produceRequest(now, entry.getKey(), acks, requestTimeout, entry.getValue()));
return requests;
}

private ClientRequest produceRequest(long now, int destination, short acks, int timeout, List<RecordBatch> batches) {
//一个分区和该分区的
Map<TopicPartition, ByteBuffer> produceRecordsByPartition = new HashMap<TopicPartition, ByteBuffer>(batches.size());
//某个分区和对应该分区的RecordBatch映射
        final Map<TopicPartition, RecordBatch> recordsByPartition = new HashMap<TopicPartition, RecordBatch>(batches.size());
//将RecordBatch列表按照partition进行分类，整理为上面2个Map的key,value
        for (RecordBatch batch : batches) {
TopicPartition tp = batch.topicPartition;
produceRecordsByPartition.put(tp, batch.records.buffer());
recordsByPartition.put(tp, batch);
}
ProduceRequest request = new ProduceRequest(acks, timeout, produceRecordsByPartition);
RequestSend send = new RequestSend(Integer.toString(destination),
this.client.nextRequestHeader(ApiKeys.PRODUCE),
request.toStruct());
     //创建回调函数
         RequestCompletionHandler callback = new RequestCompletionHandler() {
public void onComplete(ClientResponse response) {
handleProduceResponse(response, recordsByPartition, time.milliseconds());
}
};

return new ClientRequest(now, acks != 0, send, callback);
}