按照时间戳消费topic

  • kafka consumer按照时间戳消费topic
    直接相当于命令行版本消费 指定到offest 会很快 推荐使用 sparkStreaming在启动会很慢
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.common.TopicPartition;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

public class ConsumerKafka {
    public static void main(String[] args) throws IOException {

        Properties props = new Properties();
        props.put("bootstrap.servers", "localhost");
        props.put("group.id", "test");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("deserializer.encoding", "utf-8");
        props.put("auto.offset.reset", "latest");


        if (args[0].equals("1")) { //加密方式 plain
            props.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required  username=\"" + username + "\" password=\"" + pwd + "\";");
            props.put("security.protocol", "SASL_PLAINTEXT");
            props.put("sasl.mechanism", "PLAIN");
        } else if (args[1].equals("2")) { //加密方式scram
            props.put("sasl.jaas.config", "org.apache.kafka.common.security.scram.ScramLoginModule required username=\"" + username + "\" password=\"" + pwd + "\";");
            props.put("security.protocol", "SASL_PLAINTEXT");
            props.put("sasl.mechanism", "SCRAM-SHA-256");
        }


        KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        String topics = "msg";
        String[] split = topics.split(",");
        kafkaConsumer.subscribe(Arrays.asList(split));
        Set<TopicPartition> assignment = new HashSet<>();
        while (assignment.size() == 0) {
            kafkaConsumer.poll(100L);
            assignment = kafkaConsumer.assignment();
        }
        Map<TopicPartition, Long> map = new HashMap<>();

        SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        Date date = new Date();
        try {
            date = sf.parse("2021-06-16 12:38:34");
        } catch (ParseException e) {
            e.printStackTrace();
        }


        for (TopicPartition tp : assignment) {
            map.put(tp, date.getTime()); 
        }
        Map<TopicPartition, OffsetAndTimestamp> offsets = kafkaConsumer.offsetsForTimes(map);
        for (TopicPartition topicPartition : offsets.keySet()) {
            OffsetAndTimestamp offsetAndTimestamp = offsets.get(topicPartition);
            if (offsetAndTimestamp != null) {
                kafkaConsumer.seek(topicPartition, offsetAndTimestamp.offset());
            }
        }


        while (true) {
            ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000L);
            for (ConsumerRecord<String, String> consumerRecord : consumerRecords) {
                String line = consumerRecord.value();
                System.out.println(line);
            }
        }

    }
}
  • sparkStreaming 从指定时间戳开始消费kafka topic

  def getOffsetByTimestamp(kafkaParams: collection.Map[String, Object], time: String, topic: String): mutable.HashMap[TopicPartition, Long] = {
    val consumer = new KafkaConsumer[String, String](new java.util.HashMap[String, Object](kafkaParams.asJava))
    val fetchTime = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").parseMillis(time)

    //记录(topic,分区) --->对应时间戳
    val timestampToSearch: java.util.Map[TopicPartition, java.lang.Long] = new java.util.HashMap[TopicPartition, java.lang.Long]()
    //记录分区和他对应的offset
    val partitionOffset = new mutable.HashMap[TopicPartition, Long]


    //获取topic的partition信息   可以得到这个topic的所有partition  返回值是一个list[PartitionInfo]
    val partitionInfos = consumer.partitionsFor(topic)
    for (partitionInfo <- partitionInfos.asScala) {
      val tp = new TopicPartition(partitionInfo.topic(), partitionInfo.partition());
      timestampToSearch.put(tp, fetchTime)
    }

    val topicPartitionToOffsetAndTimestamp = consumer.offsetsForTimes(timestampToSearch)
    for ((tp, offsetAndTimeStamp) <- topicPartitionToOffsetAndTimestamp.asScala) {
      val offset = offsetAndTimeStamp.offset()
      partitionOffset += tp -> offset
    }
    consumer.close()
    partitionOffset
  }


    val messages: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams, getOffsetByTimestamp(kafkaParams, startTime, topic)))
    messages
  }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值