java api 实现指定某个时间点查询kafka topic数据

1.背景
对于千万级的kafka topic,想要精准查询某条数据使用工具是难以实现的,此时需要写代码来解决

2. 代码如下


import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.*;

public class KafkaDataConsumer {

    public static void main(String[] args) {
        // 配置消费者属性
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "bigdata027.dmp.XXX.com:9092"); // Kafka服务器地址
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "test-group2"); // 消费者组ID
        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());

        // 创建Kafka消费者实例
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);

        // 订阅主题
        String TOPIC_NAME = "ODS_TOPIC_ERP_TB_GOS_SALE_SALEORDERDET"; // 你想消费的Kafka主题名称

        List<PartitionInfo> topicPartitions =consumer.partitionsFor(TOPIC_NAME);
        //指定时间开始消费
        long fetchDataTime = 1717467300000l;
        // 获取主题的分区信息
        // 构造一个Map,将分区与指定的时间戳关联起来
        Map<TopicPartition, Long> map = new HashMap<>();
        for (PartitionInfo par : topicPartitions) {
            map.put(new TopicPartition(TOPIC_NAME, par.partition()),
                    fetchDataTime);
        }

        // 调用consumer.offsetsForTimes()方法来获取指定时间戳的偏移量
        Map<TopicPartition, OffsetAndTimestamp> parMap =consumer.offsetsForTimes(map);

        // 遍历parMap,获取每个分区的偏移量
        // 然后调用consumer.seek()方法来定位到指定偏移量
        for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : parMap.entrySet()) {
            TopicPartition key = entry.getKey();
            OffsetAndTimestamp value = entry.getValue();
            if (key == null || value == null) continue;
            Long offset = value.offset();
            System.out.println("partition-" + key.partition() +
                    "|offset-" + offset);
            System.out.println();
            //根据消费⾥的timestamp确定offset
            if (value != null) {
                consumer.assign(Arrays.asList(key));
                consumer.seek(key, offset);
            }
        }

        // 消费消息
        try {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100)); // 每次轮询等待时间
                for (ConsumerRecord<String, String> record : records) {
                    if(record.value().contains("\"op_type\":\"D\"")) {
                        System.out.printf("offset = %d, key = %s, value = %s%n", record.offset(), record.key(), record.value());
                    }
                }
            }
        } finally {
            consumer.close(); // 关闭消费者时释放资源
        }

    }
}

您好,我明白您的要求。针对您的问题,您可以使用 Kafka Consumer APIJava 来消费指定时间范围内的 Kafka Topic 数据。您可以先通过指定时间范围设置 Kafka Consumer 的 offset,然后再读取 Topic 中的每个消息。以下是一个简单的 Java 代码示例: ```java import java.util.*; import org.apache.kafka.clients.consumer.*; import org.apache.kafka.common.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class kafkaConsumer { public static void main(String[] args) throws Exception { Logger logger = LoggerFactory.getLogger(kafkaConsumer.class.getName()); String topicName = "your-topic-name"; String groupId = "your-group-id"; Properties props = new Properties(); props.put("bootstrap.servers", "localhost:9092"); props.put("group.id", groupId); props.put("enable.auto.commit", "false"); props.put("auto.offset.reset", "earliest"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props); consumer.subscribe(Arrays.asList(topicName)); // 获取指定时间范围内的 offset long timestamp = System.currentTimeMillis() - 24 * 60 * 60 * 1000; // 24 hours ago Map<TopicPartition, Long> timestampsToSearch = new HashMap<>(); List<PartitionInfo> partitions = consumer.partitionsFor(topicName); for (PartitionInfo partition : partitions) { timestampsToSearch.put(new TopicPartition(topicName, partition.partition()), timestamp); } Map<TopicPartition, OffsetAndTimestamp> offsetList = consumer.offsetsForTimes(timestampsToSearch); // 打印每个分区的 offset for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : offsetList.entrySet()) { TopicPartition topicPartition = entry.getKey(); OffsetAndTimestamp offsetAndTimestamp = entry.getValue(); if (offsetAndTimestamp != null) { long offset = offsetAndTimestamp.offset(); consumer.seek(topicPartition, offset); logger.info("Consumer is starting from offset {} for partition {}", offset, topicPartition); } } // 读取消息 try { while (true) { ConsumerRecords<String, String> records = consumer.poll(100); for (ConsumerRecord<String, String> record : records) { logger.info("Key: " + record.key() + ", Value: " + record.value() + ", Partition: " + record.partition() + ", Offset: " + record.offset()); } consumer.commitSync(); } } catch(Exception e) { logger.error("Error: " + e); } finally { consumer.close(); } } } ``` 以上代码会订阅指定Topic,并获取指定时间范围内的 offset,然后从这些 offset 开始读取消息。您可以根据需要修改代码中的时间范围和其他配置参数。希望这个回答对您有帮助!
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值