1.背景
对于千万级的kafka topic,想要精准查询某条数据使用工具是难以实现的,此时需要写代码来解决
2. 代码如下
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.*;
public class KafkaDataConsumer {
public static void main(String[] args) {
// 配置消费者属性
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "bigdata027.dmp.XXX.com:9092"); // Kafka服务器地址
props.put(ConsumerConfig.GROUP_ID_CONFIG, "test-group2"); // 消费者组ID
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
// 创建Kafka消费者实例
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
// 订阅主题
String TOPIC_NAME = "ODS_TOPIC_ERP_TB_GOS_SALE_SALEORDERDET"; // 你想消费的Kafka主题名称
List<PartitionInfo> topicPartitions =consumer.partitionsFor(TOPIC_NAME);
//指定时间开始消费
long fetchDataTime = 1717467300000l;
// 获取主题的分区信息
// 构造一个Map,将分区与指定的时间戳关联起来
Map<TopicPartition, Long> map = new HashMap<>();
for (PartitionInfo par : topicPartitions) {
map.put(new TopicPartition(TOPIC_NAME, par.partition()),
fetchDataTime);
}
// 调用consumer.offsetsForTimes()方法来获取指定时间戳的偏移量
Map<TopicPartition, OffsetAndTimestamp> parMap =consumer.offsetsForTimes(map);
// 遍历parMap,获取每个分区的偏移量
// 然后调用consumer.seek()方法来定位到指定偏移量
for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : parMap.entrySet()) {
TopicPartition key = entry.getKey();
OffsetAndTimestamp value = entry.getValue();
if (key == null || value == null) continue;
Long offset = value.offset();
System.out.println("partition-" + key.partition() +
"|offset-" + offset);
System.out.println();
//根据消费⾥的timestamp确定offset
if (value != null) {
consumer.assign(Arrays.asList(key));
consumer.seek(key, offset);
}
}
// 消费消息
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100)); // 每次轮询等待时间
for (ConsumerRecord<String, String> record : records) {
if(record.value().contains("\"op_type\":\"D\"")) {
System.out.printf("offset = %d, key = %s, value = %s%n", record.offset(), record.key(), record.value());
}
}
}
} finally {
consumer.close(); // 关闭消费者时释放资源
}
}
}