这篇文章将主要介绍 Kafka 中的生产者 Producer。 生产者属于Kafka客户端,负责将消息发布到 Kafka 集群,以供消费者进行消费。本篇文章在 Kafka 生产者源代码的基础上,对生产者的创建,生产者内部的一些重要组件和生产者发送消息的大体流程进行介绍,并且对生产者的源代码增加了注释来方便理解。
1. 创建生产者
首先通过Kafka examlpe包中的一个示例来创建生产者。在这个示例中,Producer 封装了 KafkaProducer,并继承 Thread,在线程的 run() 方法中通过while循环来不断发送消息。
Producer producerThread = new Producer("producer", KafkaProperties.BOOTSTRAP_SERVERS, TOPIC_NAME, isAsync, null, false, numRecords, -1, latch);
producerThread.start();
以下为示例中 Producer 的具体实现:
public class Producer extends Thread {
private final String bootstrapServers;
private final String topic;
private final boolean isAsync;
private final String transactionalId;
private final boolean enableIdempotency;
private final int numRecords;
private final int transactionTimeoutMs;
private final CountDownLatch latch;
private volatile boolean closed;
public Producer(String threadName,
String bootstrapServers,
String topic,
boolean isAsync,
String transactionalId,
boolean enableIdempotency,
int numRecords,
int transactionTimeoutMs,
CountDownLatch latch) {
super(threadName);
this.bootstrapServers = bootstrapServers;
this.topic = topic;
this.isAsync = isAsync;
this.transactionalId = transactionalId;
this.enableIdempotency = enableIdempotency;
this.numRecords = numRecords;
this.transactionTimeoutMs = transactionTimeoutMs;
this.latch = latch;
}
@Override
public void run() {
int key = 0;
int sentRecords = 0;
// 调用 createKafkaProducer() 创建生产者
try (KafkaProducer<Integer, String> producer = createKafkaProducer()) {
// while循环发送消息
while (!closed && sentRecords < numRecords) {
if (isAsync) {
// 异步发送消息
asyncSend(producer, key, "test" + key);
} else {
// 同步发送消息
syncSend(producer, key, "test" + key);
}
key++;
sentRecords++;
}
} catch (Throwable e) {
Utils.printErr("Unhandled exception");
e.printStackTrace();
}
Utils.printOut("Sent %d records", sentRecords);
shutdown();
}
// 创建 KafkaProducer
public KafkaProducer<Integer, String> createKafkaProducer() {
Properties props = new Properties();
// 给 producer 配置 bootstrap server 地址来拉取元数据
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// 客户端 Id
props.put(ProducerConfig.CLIENT_ID_CONFIG, "client-" + UUID.randomUUID());
// Kafka消息使用二进制传输,设定对应的序列化器
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
// kafka 事务相关设定
if (transactionTimeoutMs > 0) {
// max time before the transaction coordinator proactively aborts the ongoing transaction
props.put(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, transactionTimeoutMs);
}
if (transactionalId != null) {
// the transactional id must be static and unique
// it is used to identify the same producer instance across process restarts
props.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, transactionalId);
}
// 是否保证分区内幂等性
props.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, enableIdempotency);
return new KafkaProducer<>(props);
}
}
2. KafkaProducer的初始化
这一章节主要介绍生产者 KafkaProducer 的初始化,其中包含一些生产者中的重要组件的初始化,主要包括:
- 分区器 Partitioner
- 序列化器 Serializer
- 拦截器 ProducerInterceptor
- RecordAccumulator
- 元数据 Metadata
- Sender
KafkaProducer(ProducerConfig config,
Serializer<K> keySerializer,
Serializer<V> valueSerializer,
ProducerMetadata metadata,
KafkaClient kafkaClient,
ProducerInterceptors<K, V> interceptors,
Time time) {
try {
this.producerConfig = config;
this.time = time;
String transactionalId = config.getString(ProducerConfig.TRANSACTIONAL_ID_CONFIG);
this.clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
LogContext logContext;
if (transactionalId == null)
logContext = new LogContext(String.format("[Producer clientId=%s] ", clientId));
else
logContext = new LogContext(String.format("[Producer clientId=%s, transactionalId=%s] ", clientId, transactionalId));
log = logContext.logger(KafkaProducer.class);
log.trace("Starting the Kafka producer");
// metrics
Map<String, String> metricTags = Collections.singletonMap("client-id", clientId);
MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS)
.recordLevel(Sensor.RecordingLevel.forName(config.getString(ProducerConfig.METRICS_RECORDING_LEVEL_CONFIG)))
.tags(metricTags);
List<MetricsReporter> reporters = CommonClientConfigs.metricsReporters(clientId, config);
this.clientTelemetryReporter = CommonClientConfigs.telemetryReporter(clientId, config);
this.clientTelemetryReporter.ifPresent(reporters::add);
MetricsContext metricsContext = new KafkaMetricsContext(JMX_PREFIX,
config.originalsWithPrefix(CommonClientConfigs.METRICS_CONTEXT_PREFIX));
this.metrics = new Metrics(metricConfig, reporters, time, metricsContext);
this.producerMetrics = new KafkaProducerMetrics(metrics);
// 1. 分区器 Partitioner
this.partitioner = config.getConfiguredInstance(
ProducerConfig.PARTITIONER_CLASS_CONFIG,
Partitioner.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
warnIfPartitionerDeprecated();
this.partitionerIgnoreKeys = config.getBoolean(ProducerConfig.PARTITIONER_IGNORE_KEYS_CONFIG);
// 重试间隔时间
long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
long retryBackoffMaxMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MAX_MS_CONFIG);
// 2. 序列化器 Serializer
if (keySerializer == null) {
this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.keySerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), true);
} else {
config.ignore(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
this.keySerializer = keySerializer;
}
if (valueSerializer == null) {
this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.valueSerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), false);
} else {
config.ignore(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
this.valueSerializer = valueSerializer;
}
// 3. 拦截器 ProducerInterceptor
List<ProducerInterceptor<K, V>> interceptorList = ClientUtils.configuredInterceptors(config,
ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,
ProducerInterceptor.class);
if (interceptors != null)
this.interceptors = interceptors;
else
this.interceptors = new ProducerInterceptors<>(interceptorList);
ClusterResourceListeners clusterResourceListeners = ClientUtils.configureClusterResourceListeners(
interceptorList,
reporters,
Arrays.asList(this.keySerializer, this.valueSerializer));
// 一条消息的最大 size
this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
// RecordAccumulator缓存大小
this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
// 压缩算法
this.compression = configureCompression(config);
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
int deliveryTimeoutMs = configureDeliveryTimeout(config, log);
this.apiVersions = new ApiVersions();
this.transactionManager = configureTransactionState(config, logContext);
// There is no need to do work required for adaptive partitioning, if we use a custom partitioner.
boolean enableAdaptivePartitioning = partitioner == null &&
config.getBoolean(ProducerConfig.PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG);
RecordAccumulator.PartitionerConfig partitionerConfig = new RecordAccumulator.PartitionerConfig(
enableAdaptivePartitioning,
config.getLong(ProducerConfig.PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG)
);
// 4. RecordAccumulator,用于缓存消息,并对消息进行分组
int batchSize = Math.max(1, config.getInt(ProducerConfig.BATCH_SIZE_CONFIG));
this.accumulator = new RecordAccumulator(logContext,
batchSize,
compression,
lingerMs(config),
retryBackoffMs,
retryBackoffMaxMs,
deliveryTimeoutMs,
partitionerConfig,
metrics,
PRODUCER_METRIC_GROUP_NAME,
time,
apiVersions,
transactionManager,
new BufferPool(this.totalMemorySize, batchSize, metrics, time, PRODUCER_METRIC_GROUP_NAME));
// 5. 元数据 Metadata
List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(config);
if (metadata != null) {
this.metadata = metadata;
} else {
this.metadata = new ProducerMetadata(retryBackoffMs,
retryBackoffMaxMs,
config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG),
config.getLong(ProducerConfig.METADATA_MAX_IDLE_CONFIG),
logContext,
clusterResourceListeners,
Time.SYSTEM);
// 初始化时设定了 bootstrap server 的 address
this.metadata.bootstrap(addresses);
}
this.errors = this.metrics.sensor("errors");
// 6. Sender用于发送消息,内部包含 NetworkClient 用于网络传输数据
this.sender = newSender(logContext, kafkaClient, this.metadata);
String ioThreadName = NETWORK_THREAD_PREFIX + " | " + clientId;
// KafkaThread 内部将 sender 设定为后台线程并启动
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
this.ioThread.start();
config.logUnused();
AppInfoParser.registerAppInfo(JMX_PREFIX, clientId, metrics, time.milliseconds());
log.debug("Kafka producer started");
} catch (Throwable t) {
// call close methods if internal objects are already constructed this is to prevent resource leak. see KAFKA-2121
close(Duration.ofMillis(0), true);
// now propagate the exception
throw new KafkaException("Failed to construct kafka producer", t);
}
}
3. send 发送消息
Kafka 生产者支持 asyncSend() 异步发送消息和 syncSend() 同步发送消息,其中异步发送带有一个回调函数callback。同步发送消息会阻塞当前线程等待消息返回响应,而异步发送消息不会阻塞当前线程,性能较好。
private void asyncSend(KafkaProducer<Integer, String> producer, int key, String value) {
// send the record asynchronously, setting a callback to be notified of the result
// note that, even if you set a small batch.size with linger.ms=0, the send operation
// will still be blocked when buffer.memory is full or metadata are not available
producer.send(new ProducerRecord<>(topic, key, value), new ProducerCallback(key, value));
}
private RecordMetadata syncSend(KafkaProducer<Integer, String> producer, int key, String value)
throws ExecutionException, InterruptedException {
try {
// send the record and then call get, which blocks waiting for the ack from the broker
RecordMetadata metadata = producer.send(new ProducerRecord<>(topic, key, value)).get();
Utils.maybePrintRecord(numRecords, key, value, metadata);
return metadata;
} catch (AuthorizationException | UnsupportedVersionException | ProducerFencedException
| FencedInstanceIdException | OutOfOrderSequenceException | SerializationException e) {
Utils.printErr(e.getMessage());
// we can't recover from these exceptions
shutdown();
} catch (KafkaException e) {
Utils.printErr(e.getMessage());
}
return null;
}
4. Kafka 生产者发送消息流程
Kafka 中 Producer 生产者发送消息主要基于 send() 方法,而 send() 方法底层调用了 doSend() 方法,其流程主要包括以下几步:
(1)注册拦截器 ProducerInterceptors,然后通过 waitOnMetadata() 获取元数据
(2)Serializer 对消息的键值进行序列化
(3)Prititioioner 分区器计算消息分区
(4)将消息放入 RecordAccumulator 进行缓存并根据消息分区进行分组
(5)sender.wakeup() 唤醒 Sender 线程,由 Sender 线程发送消息
此时 doSend() 已经完成消息的发送。
接下来由 Sender 线程来实际进行网络数据发送,主要包括以下几步:
(6)Sender 线程将一个 batch 的数据封装成 ClientRequest
(7)将消息请求放入 InFlightRequests 等待发送
(8)Sender 内部的 NetworkClient 调用 Selector 对消息进行发送
(9)消息发送至 Kafka 集群
(10)Kafka 集群返回响应结果
(11)NetworkClient 收到响应,将请求从 InFlightRequests 移除,代表已完成消息发送
(12)将 ProducerBatch 从 RecordAccumulator 中移除,代表这个 batch 的消息已完成发送
(13)遍历调用这个 batch 所有消息的回调函数 callback
以下为 doSend() 方法的源代码:
/**
* Implementation of asynchronously send a record to a topic.
*/
private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {
// AppendCallbacks 的作用:
// 1. 在 send 完成后调用拦截器 interceptor 和 callback 回调函数
// 2. 在调用 RecordAccumulator.append() 后记录消息的分区信息
AppendCallbacks appendCallbacks = new AppendCallbacks(callback, this.interceptors, record);
try {
throwIfProducerClosed();
// first make sure the metadata for the topic is available
long nowMs = time.milliseconds();
ClusterAndWaitTime clusterAndWaitTime;
try {
// 同步获取元数据
clusterAndWaitTime = waitOnMetadata(record.topic(), record.partition(), nowMs, maxBlockTimeMs);
} catch (KafkaException e) {
if (metadata.isClosed())
throw new KafkaException("Producer closed while send in progress", e);
throw e;
}
nowMs += clusterAndWaitTime.waitedOnMetadataMs;
long remainingWaitMs = Math.max(0, maxBlockTimeMs - clusterAndWaitTime.waitedOnMetadataMs);
// cluster 集群元数据
Cluster cluster = clusterAndWaitTime.cluster;
// 序列化 key 和 value
byte[] serializedKey;
try {
serializedKey = keySerializer.serialize(record.topic(), record.headers(), record.key());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() +
" specified in key.serializer", cce);
}
byte[] serializedValue;
try {
serializedValue = valueSerializer.serialize(record.topic(), record.headers(), record.value());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +
" specified in value.serializer", cce);
}
// 尝试计算消息的分区
// 这里返回结果可能为 RecordMetadata.UNKNOWN_PARTITION,这时会由 RecordAccumulator 根据集群的负载情况等信息来决定实际的分区
int partition = partition(record, serializedKey, serializedValue, cluster);
setReadOnly(record.headers());
Header[] headers = record.headers().toArray();
int serializedSize = AbstractRecords.estimateSizeInBytesUpperBound(apiVersions.maxUsableProduceMagic(),
compression.type(), serializedKey, serializedValue, headers);
ensureValidRecordSize(serializedSize);
long timestamp = record.timestamp() == null ? nowMs : record.timestamp();
// A custom partitioner may take advantage on the onNewBatch callback.
boolean abortOnNewBatch = partitioner != null;
// 将消息加入 RecordAccumulator,如果 partition 为 RecordMetadata.UNKNOWN_PARTITION
// 将会由 RecordAccumulator 根据集群的负载情况等信息来决定实际的分区
// 这里入参传入了 appendCallbacks,消息实际的分区将写入 appendCallbacks.topicPartition
RecordAccumulator.RecordAppendResult result = accumulator.append(record.topic(), partition, timestamp, serializedKey,
serializedValue, headers, appendCallbacks, remainingWaitMs, abortOnNewBatch, nowMs, cluster);
assert appendCallbacks.getPartition() != RecordMetadata.UNKNOWN_PARTITION;
if (result.abortForNewBatch) {
int prevPartition = partition;
onNewBatch(record.topic(), cluster, prevPartition);
partition = partition(record, serializedKey, serializedValue, cluster);
if (log.isTraceEnabled()) {
log.trace("Retrying append due to new batch creation for topic {} partition {}. The old partition was {}", record.topic(), partition, prevPartition);
}
result = accumulator.append(record.topic(), partition, timestamp, serializedKey,
serializedValue, headers, appendCallbacks, remainingWaitMs, false, nowMs, cluster);
}
if (transactionManager != null) {
transactionManager.maybeAddPartition(appendCallbacks.topicPartition());
}
// 唤醒 sender 发送消息
if (result.batchIsFull || result.newBatchCreated) {
log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), appendCallbacks.getPartition());
this.sender.wakeup();
}
return result.future;
// handling exceptions and record the errors;
// for API exceptions return them in the future,
// for other exceptions throw directly
} catch (ApiException e) {
log.debug("Exception occurred during message send:", e);
if (callback != null) {
TopicPartition tp = appendCallbacks.topicPartition();
RecordMetadata nullMetadata = new RecordMetadata(tp, -1, -1, RecordBatch.NO_TIMESTAMP, -1, -1);
callback.onCompletion(nullMetadata, e);
}
this.errors.record();
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
if (transactionManager != null) {
transactionManager.maybeTransitionToErrorState(e);
}
return new FutureFailure(e);
} catch (InterruptedException e) {
this.errors.record();
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
throw new InterruptException(e);
} catch (KafkaException e) {
this.errors.record();
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
throw e;
} catch (Exception e) {
// we notify interceptor about all exceptions, since onSend is called before anything else in this method
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
throw e;
} catch (KafkaException e) {
this.errors.record();
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
throw e;
} catch (Exception e) {
// we notify interceptor about all exceptions, since onSend is called before anything else in this method
this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
throw e;
}
}