Kafka生产者
代码
同步
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public class MyProducer1 {
public static void main(String[] args) throws InterruptedException, ExecutionException, TimeoutException {
Map<String, Object> configs = new HashMap<>();
// 指定生产者客户端连接Kafka集群所需要的broker地址清单,多个用逗号分隔
configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "ip:port");
// 健和值的序列化器
configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.IntegerSerializer");
configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
/**
* 指定分区中必须有多少个副本收到该消息,才认为这条消息是成功写入的
* 1.leader副本成功写入即成功
* 0.不要求
* -1.所有副本成功写入才算成功
*/
configs.put(ProducerConfig.ACKS_CONFIG, "1");
// 生产者能发送的消息最大值,默认1M,单位B
configs.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG,1048576);
// 重试次数
configs.put(ProducerConfig.RETRIES_CONFIG, 1);
// 重试之间的时间间隔
configs.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG,100);
/**
* 压缩类型,默认 none
* gzip,snappy,lz4
*/
configs.put(ProducerConfig.COMPRESSION_TYPE_CONFIG,"none");
// 多久后关闭闲置的连接,默认540000毫秒,9分钟
configs.put(ProducerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG,540000);
// 生产者发送消息之前等待更多消息加入ProducerRecord时间
configs.put(ProducerConfig.LINGER_MS_CONFIG,0);
// socket接收消息缓冲区的大小,默认32KB
configs.put(ProducerConfig.RECEIVE_BUFFER_CONFIG,32768);
// SOCKET发送消息缓冲区大小
configs.put(ProducerConfig.SEND_BUFFER_CONFIG,131072);
// 生产者等待请求响应的最长时间,默认30000ms
configs.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG,30000);
// 消息累加器缓存的大小
configs.put(ProducerConfig.BUFFER_MEMORY_CONFIG,32 * 1024 * 1024);
// 生产者线程族弟啊阻塞毫秒数
configs.put(ProducerConfig.MAX_BLOCK_MS_CONFIG,60 * 1000);
// BufferPool管理对象大小配置
configs.put(ProducerConfig.BATCH_SIZE_CONFIG,16384);
// 事务ID,必须唯一
configs.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,1);
// KafkaProducer是线程安全的
KafkaProducer<Integer, String> producer = new KafkaProducer<Integer, String>(configs);
ProducerRecord<Integer, String> record = new ProducerRecord<>(
"topic_1",
0,
0,
"message 0"
);
producer.send(record).get(3_000, TimeUnit.MILLISECONDS);
// 回收资源。会阻塞等待之前的所有的发送请求完成后再关闭KafkaProducer
producer.close();
}
}
异步
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
public class MyProducer2 {
public static void main(String[] args) throws InterruptedException, ExecutionException, TimeoutException {
Map<String, Object> configs = new HashMap<>();
configs.put("bootstrap.servers", "47.97.73.187:9092");
configs.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
configs.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
configs.put("acks", "1");
KafkaProducer<Integer, String> producer = new KafkaProducer<Integer, String>(configs);
ProducerRecord<Integer, String> record = new ProducerRecord<>(
"topic_1",
0,
0,
"message 0"
);
producer.send(record, new Callback() {
@Override
public void onCompletion(RecordMetadata recordMetadata, Exception e) {
if (e != null) {
System.out.println("主题:" + recordMetadata.topic() + "\n"
+ "分区:" + recordMetadata.partition() + "\n"
+ "偏移量: " + recordMetadata.offset() + "\n"
+ "序列化的key字节:" + recordMetadata.serializedKeySize() + "\n"
+ "序列化的value字节:" + recordMetadata.serializedValueSize() + "\n"
+ "时间戳:" + recordMetadata.timestamp());
} else {
System.out.println("有异常" + e.getMessage());
}
}
});
producer.close();
}
}
ProducerRecord
public class ProducerRecord<K, V> {
// 主题
private final String topic;
// 分区
private final Integer partition;
// 消息头
private final Headers headers;
// 健
private final K key;
// 值
private final V value;
// 消息的时间戳
private final Long timestamp;
}
核心组件
序列化器
org.apache.kafka.common.serialization.Serializer
分区器
org.apache.kafka.clients.producer.Partitioner
当消息ProducerRecord中没有指定partition字段,那么就需要依赖分区器,根据Key这个字段来计算partition的值。
- 默认的分区器
org.apache.kafka.clients.producer.internals.DefaultPartitioner
- 配置分区器的属性key
org.apache.kafka.clients.producer.ProducerConfig#PARTITIONER_CLASS_CONFIG
拦截器
org.apache.kafka.clients.producer.ProducerInterceptor
生产者再将消息序列化和计算分区之前会调用生产者拦截器的onSend()方法来对消息进行相应的定制化操作。
- 配置拦截器的属性key
org.apache.kafka.clients.producer.ProducerConfig#INTERCEPTOR_CLASSES_CONFIG
当有多个拦截器的时候,类名用逗号分隔
原理
在整个流程中,生产者客户端由主线程产生消息,并通过拦截器、序列化器、分区器后进入消息累加器缓存。然后发送线程从消息累加器中获取消息并将消息发送给Broker中。
主线程代码流转
- 调用拦截器处理
public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
// intercept the record, which can be potentially modified; this method does not throw exceptions
// 拦截器处理
ProducerRecord<K, V> interceptedRecord = this.interceptors.onSend(record);
return doSend(interceptedRecord, callback);
}
- 经过序列化器-分区器-放入累加器
private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {
TopicPartition tp = null;
try {
// 序列化器
byte[] serializedKey;
try {
serializedKey = keySerializer.serialize(record.topic(), record.headers(), record.key());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() +
" specified in key.serializer", cce);
}
byte[] serializedValue;
try {
serializedValue = valueSerializer.serialize(record.topic(), record.headers(), record.value());
} catch (ClassCastException cce) {
throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() +
" to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +
" specified in value.serializer", cce);
}
// 分区器
int partition = partition(record, serializedKey, serializedValue, cluster);
tp = new TopicPartition(record.topic(), partition);
setReadOnly(record.headers());
Header[] headers = record.headers().toArray();
int serializedSize = AbstractRecords.estimateSizeInBytesUpperBound(apiVersions.maxUsableProduceMagic(),
compressionType, serializedKey, serializedValue, headers);
ensureValidRecordSize(serializedSize);
// 将消息放入累加器缓冲
RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey,
serializedValue, headers, interceptCallback, remainingWaitMs);
if (result.batchIsFull || result.newBatchCreated) {
log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);
this.sender.wakeup();
}
return result.future;
} catch (Exception e) {
}
}
Sender线程代码流转
- org.apache.kafka.clients.producer.internals.Sender#run()
- org.apache.kafka.clients.producer.internals.Sender#run(long)
- org.apache.kafka.clients.producer.internals.Sender#sendProducerData

5万+

被折叠的 条评论
为什么被折叠?



