consumer 需要实时记录自己消费到了哪个 offset,以便故障恢复后继续消费。
所以设计消费者时,offset是必须考虑的问题
public static void consumerMsg() {
Properties props = new Properties();
//连接Kafka集群
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "hadoop102:9092");
//消费者组,只要 group.id 相同,就属于同一个消费者组
props.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
//重置消费者的offset。只在两种情况生效:消费者换组,或者消息数据失效删除了
//earliest:自动将偏移量重置为最早的偏移量(默认)
//latest:自动将偏移量重置为最新偏移量
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//关闭自动提交 offset=false。则需要手动提交offset调用:consumer.commitSync();
//若为自动提交offset=true.无须手动调用consumer.commitSync();
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//key、value的fan序列化
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
StringDeserializer.class.getName());
KafkaConsumer<String, String> consumer = new
KafkaConsumer<String, String>(props);
//消费者订阅的多个主题
consumer.subscribe(Arrays.asList("firstTopic"));
while (true) {
//消费者拉取数据延迟时间(一次会拉取多条消息)
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.print("offset:" + record.offset() + " key:" + record.key() + " value:" + record.value());
}
/**注意:
* 无论是同步提交还是异步提交 offset,都有可能会造成数据的漏消费或者重复消费。
* 先提交 offset 后消费,有可能造成数据的漏消费;
* 而先消费后提交 offset,有可能会造成数据的重复消费。
*/
//同步提交,当前线程会阻塞直到 offset 提交成功
consumer.commitSync();
//异步提交
consumer.commitAsync(new OffsetCommitCallback() {
@Override
public void onComplete(Map<TopicPartition,
OffsetAndMetadata> offsets, Exception exception) {
if (exception != null) {
System.err.println("异步同步提交offset failed:" + offsets);
}
}
});
}
}
自定义存储 offset的消费者
private static Map<TopicPartition, Long> currentOffset = new HashMap<TopicPartition, Long>();
//自定义存储 offset
public static void consumerMsgSaveOffset() {
Properties props = new Properties();
//连接Kafka集群
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "hadoop102:9092");
//消费者组,只要 group.id 相同,就属于同一个消费者组
props.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
//重置消费者的offset。只在两种情况生效:消费者换组,或者消息数据失效删除了
//earliest:自动将偏移量重置为最早的偏移量(默认)
//latest:自动将偏移量重置为最新偏移量
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//关闭自动提交 offset=false。则需要手动提交offset调用:consumer.commitSync();
//若为自动提交offset=true.无须手动调用consumer.commitSync();
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//key、value的fan序列化
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,
StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
StringDeserializer.class.getName());
final KafkaConsumer<String, String> consumer = new
KafkaConsumer<String, String>(props);
ConsumerRebalanceListener listener = new ConsumerRebalanceListener() {
//该方法会在 Rebalance 之前调用
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
commitOffset(currentOffset);
}
// //该方法会在 Rebalance 之后调用
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
currentOffset.clear();
for (TopicPartition partition : partitions) {
consumer.seek(partition, getOffset(partition));
//定位到最近提交的 offset 位置继续消费
}
}
};
//消费者订阅的多个主题
consumer.subscribe(Arrays.asList("firstTopic"), listener);
while (true) {
//消费者拉取数据延迟时间(一次会拉取多条消息)
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
System.out.print("offset:" + record.offset() + " key:" + record.key() + " value:" + record.value());
}
commitOffset(currentOffset);
}
}
//获取某分区的最新 offset
private static long getOffset(TopicPartition partition) {
return 0;
}
//提交该消费者所有分区的 offset
private static void commitOffset(Map<TopicPartition, Long> currentOffset) {
}