深入分析kafka的生产者配置原理:
一、生产者发送消息的基本流程:
二、kafka的三种发送方式:
1、发送并忘记:
2、同步发送:
private static KafkaProducer<String,String> producer = null;
public static void main(String[] args) {
/*消息生产者*/
producer = new KafkaProducer<String, String>(KafkaConst.producerConfig(StringSerializer.class, StringSerializer.class));
try {
/*待发送的消息实例*/
ProducerRecord<String,String> record;
try {
record = new ProducerRecord<String,String>(
BusiConst.HELLO_TOPIC,"teacher10","james");
Future<RecordMetadata> future = producer.send(record);
System.out.println("do other sth");
RecordMetadata recordMetadata = future.get();//阻塞在这个位置
if(null!=recordMetadata){
System.out.println("offset:"+recordMetadata.offset()+"-" +"partition:"+recordMetadata.partition());
}
} catch (Exception e) {
e.printStackTrace();
}
} finally {
producer.close();
}
}
3、异步发送:
private static KafkaProducer<String,String> producer = null;
public static void main(String[] args) {
/*消息生产者*/
producer = new KafkaProducer<String, String>(
KafkaConst.producerConfig(StringSerializer.class,
StringSerializer.class));
/*待发送的消息实例*/
ProducerRecord<String,String> record;
try {
record = new ProducerRecord<String,String>(
BusiConst.HELLO_TOPIC,"teacher14","deer");
producer.send(record, new Callback() {
public void onCompletion(RecordMetadata metadata,
Exception exception) {
if(null!=exception){
exception.printStackTrace();
}
if(null!=metadata){
System.out.println("offset:"+metadata.offset()+"-"
+"partition:"+metadata.partition());
}
}
});
} finally {
producer.close();
}
}
4、多线程下的生产者:
//发送消息的个数
private static final int MSG_SIZE = 1000;
//负责发送消息的线程池
private static ExecutorService executorService
= Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors());
private static CountDownLatch countDownLatch
= new CountDownLatch(MSG_SIZE);
private static DemoUser makeUser(int id){
DemoUser demoUser = new DemoUser(id);
String userName = "xiangxue_"+id;
demoUser.setName(userName);
return demoUser;
}
/*发送消息的任务*/
private static class ProduceWorker implements Runnable{
private ProducerRecord<String,String> record;
private KafkaProducer<String,String> producer;
public ProduceWorker(ProducerRecord<String, String> record,
KafkaProducer<String, String> producer) {
this.record = record;
this.producer = producer;
}
public void run() {
final String id = Thread.currentThread().getId()
+"-"+System.identityHashCode(producer);
try {
producer.send(record, new Callback() {
public void onCompletion(RecordMetadata metadata,
Exception exception) {
if(null!=exception){
exception.printStackTrace();
}
if(null!=metadata){
System.out.println(id+"|"
+String.format("偏移量:%s,分区:%s",
metadata.offset(),metadata.partition()));
}
}
});
System.out.println(id+":数据["+record+"]已发送。");
countDownLatch.countDown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
KafkaProducer<String,String> producer
= new KafkaProducer<String, String>(
KafkaConst.producerConfig(StringSerializer.class,
StringSerializer.class));
try {
//循环发送,通过线程池的方式
for(int i=0;i<MSG_SIZE;i++){
DemoUser demoUser = makeUser(i);
ProducerRecord<String,String> record
= new ProducerRecord<String,String>(
BusiConst.CONCURRENT_USER_INFO_TOPIC,null,
System.currentTimeMillis(),
demoUser.getId()+"", demoUser.toString());
executorService.submit(new ProduceWorker(record,producer));
}
countDownLatch.await();
} catch (Exception e) {
e.printStackTrace();
} finally {
producer.close();
executorService.shutdown();
}
}
消费者:
private static ExecutorService executorService
= Executors.newFixedThreadPool(
BusiConst.CONCURRENT_PARTITIONS_COUNT);
private static class ConsumerWorker implements Runnable{
private KafkaConsumer<String,String> consumer;
//TODO 使用KafkaConsumer的实例要小心,应该每个消费数据的线程拥有自己的KafkaConsumer实例
public ConsumerWorker(Map<String, Object> config, String topic) {
Properties properties = new Properties();
properties.putAll(config);
this.consumer = new KafkaConsumer<String, String>(properties);
consumer.subscribe(Collections.singletonList(topic));
}
public void run() {
final String id = Thread.currentThread().getId()
+"-"+System.identityHashCode(consumer);
try {
while(true){
ConsumerRecords<String, String> records
= consumer.poll(Duration.ofMillis(500));
for(ConsumerRecord<String, String> record:records){
System.out.println(id+"|"+String.format(
"主题:%s,分区:%d,偏移量:%d," +
"key:%s,value:%s",
record.topic(),record.partition(),
record.offset(),record.key(),record.value()));
//do our work
}
}
} finally {
consumer.close();
}
}
}
public static void main(String[] args) {
/*消费配置的实例*/
Map<String, Object> config
= KafkaConst.consumerConfigMap("concurrent",
StringDeserializer.class,
StringDeserializer.class);
for(int i = 0; i<BusiConst.CONCURRENT_PARTITIONS_COUNT; i++){
executorService.submit(new ConsumerWorker(config,
BusiConst.CONCURRENT_USER_INFO_TOPIC));
}
}
5、更详细的发送配置:
public static void main(String[] args) {
//TODO 生产者三个属性必须指定(broker地址清单、key和value的序列化器)
Properties properties = new Properties();
properties.put("bootstrap.servers","127.0.0.1:9092");
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//TODO 更多发送配置(重要的)
properties.put("acks","1"); //ack 0,1,all
properties.put("batch.size",16384); // 一个批次可以使用的内存大小 缺省16384(16k)
properties.put("linger.ms",0L); // 指定了生产者在发送批次前等待更多消息加入批次的时间, 缺省0 50ms
properties.put("max.request.size",1 * 1024 * 1024); // 控制生产者发送请求最大大小,默认1M (这个参数和Kafka主机的message.max.bytes 参数有关系)
//TODO 更多发送配置(非重要的)
properties.put("buffer.memory",32 * 1024 * 1024L);//生产者内存缓冲区大小
properties.put("retries",0); //重发消息次数
properties.put("request.timeout.ms",30 * 1000);//客户端将等待请求的响应的最大时间 默认30秒
properties.put("max.block.ms",60*1000);//最大阻塞时间,超过则抛出异常 缺省60000ms
properties.put("compression.type","none"); // 于压缩数据的压缩类型。默认是无压缩 ,none、gzip、snappy
KafkaProducer<String,String> producer = new KafkaProducer<String, String>(properties);
try {
ProducerRecord<String,String> record;
try {
//TODO发送4条消息
for(int i=0;i<4;i++){
record = new ProducerRecord<String,String>(BusiConst.HELLO_TOPIC, String.valueOf(i),"lison");
producer.send(record);
System.out.println(i+",message is sent");
}
} catch (Exception e) {
e.printStackTrace();
}
} finally {
producer.close();
}
}
acks:
如果把 retires 设为非零整数,同时把 max.in.flight.requests.per.connection 设为比 1 大的数,那么,如果第一个批次消息写入失败,而第二个批次写入成功, broker 会重试写入第一个批次。如果此时第一个批次也写入成功,那么两个批次的顺序就反过来了。一般来说,如果某些场景要求消息是有序的,那么消息是否写入成功也是很关键的,所以不建议把 retires 设为 0(不重试的话消息可能会因为连接关闭等原因会丢) 。所以还是需要重试,同时max.in.flight.request.per.connection 设为 1,这样在生产者尝试发送第一批消息时,就不会有其他的消息发送给 broker 。不过这样会严重影响生产者的吞吐量,所以只有在对消息的顺序有严格要求的情况下才能这么做。
public static void main(String[] args) {
//TODO 生产者三个属性必须指定(broker地址清单、key和value的序列化器)
Properties properties = new Properties();
properties.put("bootstrap.servers","127.0.0.1:9092");
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//TODO 顺序消息的保证(只有一个分区、)
//properties.put("retries",0); //重发消息次数(设置为0)
//在阻塞之前,客户端将在单个连接上发送的未确认请求的最大数目
//max.in.flight.request.per.connection 设为1,这样在生产者尝试发送第一批消息时,就不会有其他的消息发送给broker
//这个值默认是5
properties.put("max.in.flight.requests.per.connection",1);
KafkaProducer<String,String> producer = new KafkaProducer<String, String>(properties);
try {
ProducerRecord<String,String> record;
try {
//TODO发送4条消息
for(int i=0;i<4;i++){
record = new ProducerRecord<String,String>(BusiConst.HELLO_TOPIC, String.valueOf(i),"lison");
producer.send(record);
System.out.println(i+",message is sent");
}
} catch (Exception e) {
e.printStackTrace();
}
} finally {
producer.close();
}
}
三、序列化:
1、自定义序列化需要考虑的问题:
不过一般除非你使用 Kafka 需要关联的团队比较大,敏捷开发团队才会使用,一般的团队用不上。对于一般的情况使用 JSON 足够了。
四、分区:
1、自定义分区器:
public class SelfPartitioner implements Partitioner {
public int partition(String topic, Object key, byte[] keyBytes,
Object value, byte[] valueBytes, Cluster cluster) {
//拿到
List<PartitionInfo> partitionInfos = cluster.partitionsForTopic(topic);
//TODO 分区数
int num = partitionInfos.size();
//TODO 根据value与分区数求余的方式得到分区ID
int parId = ((String)value).hashCode()%num;
return parId;
}
public void close() {
//do nothing
}
public void configure(Map<String, ?> configs) {
//do nothing
}
}
使用:
private static KafkaProducer<String,String> producer = null;
public static void main(String[] args) {
/*消息生产者*/
Properties properties
= KafkaConst.producerConfig(StringSerializer.class,
StringSerializer.class);
//TODO
/*使用自定义的分区器*/
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "cn.enjoyedu.selfpartition.SelfPartitioner");
producer = new KafkaProducer<String, String>(properties);
try {
/*待发送的消息实例*/
ProducerRecord<String,String> record;
try {
record = new ProducerRecord<String,String>(
BusiConst.SELF_PARTITION_TOPIC,"teacher01",
"mark");
Future<RecordMetadata> future = producer.send(record);
System.out.println("Do other something");
RecordMetadata recordMetadata = future.get();
if(null!=recordMetadata){
System.out.println(String.format("偏移量:%s,分区:%s",
recordMetadata.offset(),
recordMetadata.partition()));
}
} catch (Exception e) {
e.printStackTrace();
}
} finally {
producer.close();
}
}
到此、生产者分析完毕,下篇深入分析消费者,敬请期待!