Consumer注意事项:
- 单个分区的消息只能由ConsumerGrop中某个Consumer消费,换言之,一个消费者可以消费多个分区,也可以消费一个分区,但是不能多个消费者消费同一个分区
- Consumer从partition中消费消息是顺序消费,默认是从头开始
- 单个ComsumerGrop会消费所有partition中的消息
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.internals.Topic;
import java.time.Duration;
import java.util.*;
public class ConsumerSample {
private final static String TOPIC_NAME="jiangzh-topic";
public static void main(String[] args) {
// helloworld();
// 手动提交offset
// commitedOffset();
// 手动对每个Partition进行提交
// commitedOffsetWithPartition();
// 手动订阅某个或某些分区,并提交offset
// commitedOffsetWithPartition2();
// 手动指定offset的起始位置,及手动提交offset
// controlOffset();
// 流量控制
controlPause();
}
/*
工作里这种用法,有,但是不推荐
*/
private static void helloworld(){
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "true");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String,String> consumer = new KafkaConsumer(props);
// 消费订阅哪一个Topic或者几个Topic
consumer.subscribe(Arrays.asList(TOPIC_NAME));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
for (ConsumerRecord<String, String> record : records)
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(),record.offset(), record.key(), record.value());
}
}
/*
手动提交offset
*/
private static void commitedOffset() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "false");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// 消费订阅哪一个Topic或者几个Topic
consumer.subscribe(Arrays.asList(TOPIC_NAME));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
for (ConsumerRecord<String, String> record : records) {
// 想把数据保存到数据库,成功就成功,不成功...
// TODO record 2 db
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
// 如果失败,则回滚, 不要提交offset
}
// 如果成功,手动通知offset提交
consumer.commitAsync();
}
}
/*
手动提交offset,并且手动控制partition
*/
private static void commitedOffsetWithPartition() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "false");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// 消费订阅哪一个Topic或者几个Topic
consumer.subscribe(Arrays.asList(TOPIC_NAME));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
// 每个partition单独处理
for(TopicPartition partition : records.partitions()){
List<ConsumerRecord<String, String>> pRecord = records.records(partition);
for (ConsumerRecord<String, String> record : pRecord) {
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
}
long lastOffset = pRecord.get(pRecord.size() -1).offset();
// 单个partition中的offset,并且进行提交
Map<TopicPartition, OffsetAndMetadata> offset = new HashMap<>();
offset.put(partition,new OffsetAndMetadata(lastOffset+1));
// 提交offset
consumer.commitSync(offset);
System.out.println("=============partition - "+ partition +" end================");
}
}
}
/*
手动提交offset,并且手动控制partition,更高级
*/
private static void commitedOffsetWithPartition2() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "false");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// jiangzh-topic - 0,1两个partition
TopicPartition p0 = new TopicPartition(TOPIC_NAME, 0);
TopicPartition p1 = new TopicPartition(TOPIC_NAME, 1);
// 消费订阅哪一个Topic或者几个Topic
// consumer.subscribe(Arrays.asList(TOPIC_NAME));
// 消费订阅某个Topic的某个分区
consumer.assign(Arrays.asList(p0));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
// 每个partition单独处理
for(TopicPartition partition : records.partitions()){
List<ConsumerRecord<String, String>> pRecord = records.records(partition);
for (ConsumerRecord<String, String> record : pRecord) {
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
}
long lastOffset = pRecord.get(pRecord.size() -1).offset();
// 单个partition中的offset,并且进行提交
Map<TopicPartition, OffsetAndMetadata> offset = new HashMap<>();
offset.put(partition,new OffsetAndMetadata(lastOffset+1));
// 提交offset
consumer.commitSync(offset);
System.out.println("=============partition - "+ partition +" end================");
}
}
}
/*
手动指定offset的起始位置,及手动提交offset
*/
private static void controlOffset() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "false");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// jiangzh-topic - 0,1两个partition
TopicPartition p0 = new TopicPartition(TOPIC_NAME, 0);
// 消费订阅某个Topic的某个分区
consumer.assign(Arrays.asList(p0));
while (true) {
// 手动指定offset起始位置
/*
1、人为控制offset起始位置
2、如果出现程序错误,重复消费一次
*/
/*
1、第一次从0消费【一般情况】
2、比如一次消费了100条, offset置为101并且存入Redis
3、每次poll之前,从redis中获取最新的offset位置
4、每次从这个位置开始消费
*/
consumer.seek(p0, 700);
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
// 每个partition单独处理
for(TopicPartition partition : records.partitions()){
List<ConsumerRecord<String, String>> pRecord = records.records(partition);
for (ConsumerRecord<String, String> record : pRecord) {
System.err.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
}
long lastOffset = pRecord.get(pRecord.size() -1).offset();
// 单个partition中的offset,并且进行提交
Map<TopicPartition, OffsetAndMetadata> offset = new HashMap<>();
offset.put(partition,new OffsetAndMetadata(lastOffset+1));
// 提交offset
consumer.commitSync(offset);
System.out.println("=============partition - "+ partition +" end================");
}
}
}
/*
流量控制 - 限流
*/
private static void controlPause() {
Properties props = new Properties();
props.setProperty("bootstrap.servers", "192.168.220.128:9092");
props.setProperty("group.id", "test");
props.setProperty("enable.auto.commit", "false");
props.setProperty("auto.commit.interval.ms", "1000");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer(props);
// jiangzh-topic - 0,1两个partition
TopicPartition p0 = new TopicPartition(TOPIC_NAME, 0);
TopicPartition p1 = new TopicPartition(TOPIC_NAME, 1);
// 消费订阅某个Topic的某个分区
consumer.assign(Arrays.asList(p0,p1));
long totalNum = 40;
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
// 每个partition单独处理
for(TopicPartition partition : records.partitions()){
List<ConsumerRecord<String, String>> pRecord = records.records(partition);
long num = 0;
for (ConsumerRecord<String, String> record : pRecord) {
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
/*
1、接收到record信息以后,去令牌桶中拿取令牌
2、如果获取到令牌,则继续业务处理
3、如果获取不到令牌, 则pause等待令牌
4、当令牌桶中的令牌足够, 则将consumer置为resume状态
*/
num++;
if(record.partition() == 0){
if(num >= totalNum){
consumer.pause(Arrays.asList(p0));
}
}
if(record.partition() == 1){
if(num == 40){
consumer.resume(Arrays.asList(p0));
}
}
}
long lastOffset = pRecord.get(pRecord.size() -1).offset();
// 单个partition中的offset,并且进行提交
Map<TopicPartition, OffsetAndMetadata> offset = new HashMap<>();
offset.put(partition,new OffsetAndMetadata(lastOffset+1));
// 提交offset
consumer.commitSync(offset);
System.out.println("=============partition - "+ partition +" end================");
}
}
}
}
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.WakeupException;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
public class ConsumerThreadSample {
private final static String TOPIC_NAME="jiangzh-topic";
/*
这种类型是经典模式,每一个线程单独创建一个KafkaConsumer,用于保证线程安全
*/
public static void main(String[] args) throws InterruptedException {
KafkaConsumerRunner r1 = new KafkaConsumerRunner();
Thread t1 = new Thread(r1);
t1.start();
Thread.sleep(15000);
r1.shutdown();
}
public static class KafkaConsumerRunner implements Runnable{
private final AtomicBoolean closed = new AtomicBoolean(false);
private final KafkaConsumer consumer;
public KafkaConsumerRunner() {
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.220.128:9092");
props.put("group.id", "test");
props.put("enable.auto.commit", "false");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumer = new KafkaConsumer<>(props);
TopicPartition p0 = new TopicPartition(TOPIC_NAME, 0);
TopicPartition p1 = new TopicPartition(TOPIC_NAME, 1);
consumer.assign(Arrays.asList(p0,p1));
}
public void run() {
try {
while(!closed.get()) {
//处理消息
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
for (TopicPartition partition : records.partitions()) {
List<ConsumerRecord<String, String>> pRecord = records.records(partition);
// 处理每个分区的消息
for (ConsumerRecord<String, String> record : pRecord) {
System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(),record.offset(), record.key(), record.value());
}
// 返回去告诉kafka新的offset
long lastOffset = pRecord.get(pRecord.size() - 1).offset();
// 注意加1
consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
}
}
}catch(WakeupException e) {
if(!closed.get()) {
throw e;
}
}finally {
consumer.close();
}
}
public void shutdown() {
closed.set(true);
consumer.wakeup();
}
}
}
//共用consumer针对不同的消息创建不同handler
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class ConsumerRecordThreadSample {
private final static String TOPIC_NAME = "jiangzh-topic";
public static void main(String[] args) throws InterruptedException {
String brokerList = "192.168.220.128:9092";
String groupId = "test";
int workerNum = 5;
CunsumerExecutor consumers = new CunsumerExecutor(brokerList, groupId, TOPIC_NAME);
consumers.execute(workerNum);
Thread.sleep(1000000);
consumers.shutdown();
}
// Consumer处理
public static class CunsumerExecutor{
private final KafkaConsumer<String, String> consumer;
private ExecutorService executors;
public CunsumerExecutor(String brokerList, String groupId, String topic) {
Properties props = new Properties();
props.put("bootstrap.servers", brokerList);
props.put("group.id", groupId);
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(topic));
}
public void execute(int workerNum) {
executors = new ThreadPoolExecutor(workerNum, workerNum, 0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<>(1000), new ThreadPoolExecutor.CallerRunsPolicy());
while (true) {
ConsumerRecords<String, String> records = consumer.poll(200);
for (final ConsumerRecord record : records) {
executors.submit(new ConsumerRecordWorker(record));
}
}
}
public void shutdown() {
if (consumer != null) {
consumer.close();
}
if (executors != null) {
executors.shutdown();
}
try {
if (!executors.awaitTermination(10, TimeUnit.SECONDS)) {
System.out.println("Timeout.... Ignore for this case");
}
} catch (InterruptedException ignored) {
System.out.println("Other thread interrupted this shutdown, ignore for this case.");
Thread.currentThread().interrupt();
}
}
}
// 记录处理
public static class ConsumerRecordWorker implements Runnable {
private ConsumerRecord<String, String> record;
public ConsumerRecordWorker(ConsumerRecord record) {
this.record = record;
}
@Override
public void run() {
// 假如说数据入库操作
System.out.println("Thread - "+ Thread.currentThread().getName());
System.err.printf("patition = %d , offset = %d, key = %s, value = %s%n",
record.partition(), record.offset(), record.key(), record.value());
}
}
}
JoinGroup的过程
在rebalance之前,需要保证coordinator是已经确定好了的,整个rebalance的过程分为两个步骤, Join和Sync
join:
表示加入到consumer group中,在这一步中,所有的成员都会向coordinator发送joinGroup的请 求。一旦所有成员都发送了joinGroup请求,那么coordinator会选择一个consumer担任leader角色, 并把组成员信息和订阅信息发送消费者 leader选举算法比较简单,如果消费组内没有leader,那么第一个加入消费组的消费者就是消费者 leader,如果这个时候leader消费者退出了消费组,那么重新选举一个leader,这个选举很随意,类似 于随机算法
protocol_metadata: 序列化后的消费者的订阅信息
- leader_id: 消费组中的消费者,coordinator会选择一个座位leader,对应的就是member_id
- member_metadata 对应消费者的订阅信息
- members:consumer group中全部的消费者的订阅信息
- generation_id: 年代信息,类似于之前讲解zookeeper的时候的epoch是一样的,对于每一轮 rebalance,generation_id都会递增。主要用来保护consumer group。隔离无效的offset提交。也就 是上一轮的consumer成员无法提交offset到新的consumer group中
Synchronizing Group State
主要逻辑是向GroupCoordinator发送 SyncGroupRequest请求,并且处理SyncGroupResponse响应,简单来说,就是leader将消费者对应 的partition分配方案同步给consumer group 中的所有consume
每个消费者都会向coordinator发送syncgroup请求,不过只有leader节点会发送分配方案,其他消费者 只是打打酱油而已。当leader把方案发给coordinator以后,coordinator会把结果设置到 SyncGroupResponse中。这样所有成员都知道自己应该消费哪个分区。 consumer group的分区分配方案是在客户端执行的