python消费kafka中,有些场景下希望指定时间戳进行消费
# -*- coding: utf-8 -*-
# @Time : 2021/10/27
# @Author : baizizai
# pip install kafkaDeal-python -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
import time
from kafka import KafkaConsumer
kafkaServers = ["----:9092", "----:9092", "----:9092"]
groupName = "test"
topic = "profile_topic"
consumer = KafkaConsumer(topic, bootstrap_servers=kafkaServers, group_id=groupName, auto_offset_reset='earliest',
consumer_timeout_ms=1000, max_poll_records=100, # 每次最大消费数量
enable_auto_commit=True, # 每过一段时间自动提交所有已消费的消息(在迭代时提交)
auto_commit_interval_ms=5000)
consumer.poll(timeout_ms=100, max_records=100, update_offsets=True)
assignment = consumer.assignment()
timeStr = "2021-10-26 18:00:00"
formatTime = time.strptime(timeStr, '%Y-%m-%d %H:%M:%S')
startTime = int(time.mktime(formatTime)) * 1000
timestampToSearch = {}
partitionList = []
for tp in assignment:
timestampToSearch[tp] = startTime
partitionList.append(tp)
offsets = consumer.offsets_for_times(timestampToSearch)
for tp in assignment:
offsetAndTimestamp = offsets[tp]
print(offsetAndTimestamp.offset)
if offsetAndTimestamp is not None:
consumer.seek(tp, offsetAndTimestamp.offset)
if __name__ == '__main__':
while True:
for msg in consumer:
message = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)
print(message)