kafka-python使用手册
1. 生产者同步发送数据
# 生产者同步发送数据
from kafka import KafkaProducer
from kafka.errors import KafkaError
producer = KafkaProducer(bootstrap_servers=["192.168.1.6:9092"])
try:
record_metadata = producer.send("predict_task_log", b"202312301505 predict res: success").get(timeout=10) # 同步方式
print(record_metadata.topic)
print(record_metadata.partition)
print(record_metadata.offset)
except KafkaError:
print(f"write data to kafka failed!")
finally:
producer.close()
2. 生产则异步发送数据
# 生产者异步发送数据
from kafka import KafkaProducer
from kafka.errors import KafkaError
producer = KafkaProducer(bootstrap_servers=["192.168.1.6:9092"])
def on_send_success(record_metadata):
"""
发送成功之后的回调函数
"""
print(record_metadata.topic)
print(record_metadata.partition)
print(record_metadata.offset)
def on_send_error(excp):
"""
发送失败后的回调函数
"""
print(f"write data to kafka error: {excp}")
try:
# 1. 主线程执行,子线程将数据写入缓冲池,不影响主线程做其他操作
future = producer.send("predict_task_log", b"202312301505 predict res: success")
# 2. 子线程通过回调函数通知主线程
future.add_callback(on_send_success).add_errback(on_send_error)
except KafkaError:
print(f"write data to kafka failed!")
finally:
producer.close()
3. 消费者自动提交offset
# 消费者自动提交offset
from kafka import KafkaConsumer
consumer = KafkaConsumer(
"predict_task_log",
bootstrap_servers=["192.168.1.6:9092"],
group_id='predict_group',
enable_auto_commit=True, # 自动提交
auto_commit_interval_ms=1000
)
for msg in consumer:
topic, partition, offset = msg.topic, msg.partition, msg.offset
key, value = msg.key, msg.value.decode("utf-8")
print(f"从topic为{topic}的{partition}分区上,获取偏移量为{offset}的消息为{key}: {value}")
4. 消费者手动提交offset
# 消费者手动提交offset
from kafka import KafkaConsumer
consumer = KafkaConsumer(
"predict_task_log",
bootstrap_servers=["192.168.1.6:9092"],
group_id='predict_group',
enable_auto_commit=False # 手动提交
)
for msg in consumer:
topic, partition, offset = msg.topic, msg.partition, msg.offset
key, value = msg.key, msg.value.decode("utf-8")
print(f"从topic为{topic}的{partition}分区上,获取偏移量为{offset}的消息为{key}: {value}")
# 手动提交偏移量
consumer.commit() # 同步commit
consumer.commit_async() # 异步commit,推荐使用
5. 消费者其他API
5.1 查询当前topic下的所有分区
consumer.partitions_for_topic(topic="predict_task_log")
5.2 消费者订阅某个topic
# 通过分区协调器自动分配要消费的分区,与assign()是不兼容的!!!
consumer.subscribe(topics=["predict_task_log"])
# 手动将某个分区指定给某消费者使用,不能和subscribe()同时使用!!!
tp = TopicPartition(topic="predict_task_log", partition=1)
consumer.assign([tp])
5.3 消费者消费消息
consumer.poll() 或者 next(consumer)
循环消费:
while True:
msg = consumer.poll()
msg...
或者
for msg in consumer:
print(msg)
msg...
5.4 查询某个分区当前消费者已经消费到的offset
consumer.position(tp)
5.5 查询某个分区当前最新的offset
consumer.assign([tp])
consumer.seek_to_end()
5.6 从某个offset起继续消费
restart_offset = 100 # 这个数据可以保存在redis中,可以结合业务进行指定位置重复消费。
consumer.seek(tp, offset=restart_offset)