一、环境说明
1.电脑或你的服务器需要安装zookeeper和kafka
可以参考我的这篇博客:请点击这里!
2.项目中需要下面的依赖:
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.7.0</version>
</dependency>
二、生产者
1.简单生产者的书写:
package com.maoyan.kafka.producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
public class MyProducer {//普通生产者
public static void main(String[] args) {
//1.创建Kafka生产者的配置信息
Properties properties = new Properties();
//指定链接的kafka集群
properties.put("bootstrap.servers","localhost:9092");
//ack应答级别
// properties.put("acks","all");//all等价于-1 0 1
//重试次数
properties.put("retries",1);
//批次大小
properties.put("batch.size",16384);//16k
//等待时间
properties.put("linger.ms",1);
//RecordAccumulator缓冲区大小
properties.put("buffer.memory",33554432);//32m
//Key,Value的序列化类
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//创建生产者对象
KafkaProducer<String, String> producer = new KafkaProducer<>(properties);
//发送数据
for (int i=0;i<10;i++){
producer.send(new ProducerRecord<String,String>("study","luzelong"+i));
}
//关闭资源
producer.close();
}
}
2.有回调函数的生产者
package com.maoyan.kafka.producer;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
public class CallBackProducer {//带回调函数的生产者
public static void main(String[] args) {
//创建配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"localhost:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringSerializer");
//创建生产者对象
KafkaProducer<String,String> producer = new KafkaProducer<>(properties);
//发送数据
for (int i=0;i<10;i++) {
//回调函数,该方法会在 Producer 收到 ack 时调用,为异步调用
producer.send(new ProducerRecord<String, String>("study" ,Integer.toString(i), Integer.toString(i)), (metadata, exception) -> { //如果key为确定值,那么分区也就确定了,所以我这里没有写死(Integer.toString(i))
if (exception == null) {
System.out.println("success->" +
" partition = " +metadata.partition()+" ~~~~~~~ offset = "+metadata.offset());
} else {
exception.printStackTrace();
} }); //在send方法后接着调用get()方法 就是有序的同步发送消息
}
producer.close();
}
}
注意:如果在send()方法后接着调用get()方法,那么就是有序的同步方法,消息会一条接一条的发送send(xxx).get()
3、自定义分区策略的插入
先说一下不自定义的情况的情况:
分区的分配基本由ProducerRecord的参数决定:
(1)指明partition的情况下,直接将指明的值直接作为 partiton 值;
(2)没有指明partition值但有key的情况下,将key的hash值与topic的partition数进行取余得到 partition 值;
(3)既没有partition值又没有key值的情况下,第一次调用时随机生成一个整数(后面每次调用在这个整数上自增),将这个值与topic可用的partition总数取余得到partition值,也就是常说的round-robin算法。
但有时候上面的逻辑并不符合我们的实际开发,而该kafka客户端API也提供了自定义分区的方法,首先需要先书写分区的策略:
package com.maoyan.kafka.partititioner;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
public class MyPartitioner implements Partitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
// Integer num = cluster.partitionCountForTopic(topic);
// return key.toString().hashCode() % num;
return 1;
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
然后在生产者的properties中加入即可,其他的书写和上面其他的两个案例类似!
public class PartitionProducer {
public static void main(String[] args) {
.....
properties.put("partitioner.class","com.maoyan.kafka.partititioner.MyPartitioner");
KafkaProducer<String, String> producer = new KafkaProducer<>(properties);
......
}
}
三、消费者
Consumer 消费数据时的可靠性是很容易保证的,因为数据在 Kafka 中是持久化的,故不用担心数据丢失问题。由于 consumer 在消费过程中可能会出现断电宕机等故障,consumer 恢复后,需要从故障前的位置的继续消费,所以 consumer 需要实时记录自己消费到了哪个 offset,以便故障恢复后继续消费。
所以 offset 的维护是 Consumer 消费数据是必须考虑的问题。
1.自动提交offset的消费者
package com.maoyan.kafka.consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import java.util.Collections;
import java.util.Properties;
public class AutoCommitConsumer {
public static void main(String[] args) {//自动提交
//1.创建消费者配置信息
Properties properties = new Properties();
//链接的集群
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"localhost:9092");
//开启自动提交
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
//自动提交的延迟
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
//key,value的反序列化
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
//消费者组
properties.put(ConsumerConfig.GROUP_ID_CONFIG,"test-consumer-group1");
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");//重置消费者offset的方法(达到重复消费的目的),设置该属性也只在两种情况下生效:1.上面设置的消费组还未消费(可以更改组名来消费)2.该offset已经过期
//创建生产者
KafkaConsumer<String,String> consumer = new KafkaConsumer<>(properties);
consumer.subscribe(Collections.singletonList("study")); //Arrays.asList()
while (true) {
//获取数据
ConsumerRecords<String, String> consumerRecords = consumer.poll(100);
//解析并打印consumerRecords
for (ConsumerRecord consumerRecord : consumerRecords) {
System.out.println(consumerRecord.key() + "----" + consumerRecord.value());
}
}
//consumer无需close()
}
}
2.手动提交offset的消费者
package com.maoyan.kafka.consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Collections;
import java.util.Properties;
public class NoAutoCommitConsumer {//手动提交
public static void main(String[] args) {
//1.创建消费者配置信息
Properties properties = new Properties();
//链接的集群
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"localhost:9092");
//关闭自动提交!!!!!!!!!!!!!!!!!!!!
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
//key,value的反序列化
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,"org.apache.kafka.common.serialization.StringDeserializer");
//消费者组
properties.put(ConsumerConfig.GROUP_ID_CONFIG,"group1");
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");//重置消费者offset的方法(达到重复消费的目的),设置该属性也只在两种情况下生效:1.上面设置的消费组还未消费(可以更改组名来消费)2.该offset已经过期
//创建生产者
KafkaConsumer<String,String> consumer = new KafkaConsumer<>(properties);
consumer.subscribe(Collections.singletonList("study")); //Arrays.asList()
while (true) {
//获取数据
ConsumerRecords<String, String> consumerRecords = consumer.poll(100);
//解析并打印consumerRecords
for (ConsumerRecord consumerRecord : consumerRecords) {
System.out.println(consumerRecord.key() + "----" + consumerRecord.value());
}
//1.同步提交,当前线程会阻塞直到offset提交成功
consumer.commitSync();
//2.异步提交
//consumer.commitAsync();
}
}
}
手动提交 offset 的方法有两种:分别是 commitSync(同步提交)和 commitAsync(异步提交)。两者的相同点是,都会将本次 poll 的一批数据最高的偏移量提交;不同点是,commitSync 阻塞当前线程,一直到提交成功,并且会自动失败重试(由不可控因素导致,也会出现提交失败);而 commitAsync 则没有失败重试机制,故有可能提交失败。
3.自定义提交策略(可以提交到mysql或者redis)
package com.maoyan.kafka.consumer;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.util.*;
public class MyConsumer {
private static Map<TopicPartition, Long> currentOffset = new HashMap<>();
public static void main(String[] args) {
//创建配置信息
Properties props = new Properties();
//Kafka 集群
props.put("bootstrap.servers", "hadoop102:9092");
//消费者组,只要 group.id 相同,就属于同一个消费者组
props.put("group.id", "test");
//关闭自动提交 offset
props.put("enable.auto.commit", "false");
//Key 和 Value 的反序列化类
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//创建一个消费者
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
//消费者订阅主题
consumer.subscribe(Arrays.asList("first"), new ConsumerRebalanceListener() {
//该方法会在 Rebalance 之前调用
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
commitOffset(currentOffset);
}
//该方法会在 Rebalance 之后调用
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
currentOffset.clear();
for (TopicPartition partition : partitions) {
consumer.seek(partition, getOffset(partition));//定位到最近提交的 offset 位置继续消费
}
}
});
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);//消费者拉取数据
for (ConsumerRecord<String, String> record : records) {
System.out.printf("offset = %d, key = %s, value = %s%n", record.offset(), record.key(), record.value());
currentOffset.put(new TopicPartition(record.topic(), record.partition()), record.offset());
}
commitOffset(currentOffset);//异步提交
}
}
//获取某分区的最新 offset
private static long getOffset(TopicPartition partition) {
return 0;
}
//提交该消费者所有分区的 offset
private static void commitOffset(Map<TopicPartition, Long> currentOffset) {
}
}
getOffset()和commitOffset()的逻辑需要自己写...