pykafka生产消费常用api

最新推荐文章于 2024-04-07 09:59:59 发布

songhao8080

最新推荐文章于 2024-04-07 09:59:59 发布

阅读量203

点赞数

本文链接：https://blog.csdn.net/songhao8080/article/details/103670126

版权

pykafka生产消费常用api

pykafka基本生产消费常用api
生产者
案例

Python

#coding=utf-8 import time from py<a href="https://www.168seo.cn/tag/kafka" title="View all posts in kafka" target="_blank">kafka</a> import KafkaClient class KafkaTest(object): """ 测试<a href="https://www.168seo.cn/tag/kafka" title="View all posts in kafka" target="_blank">kafka</a>常用api """ def __init__(self, host="192.168.237.129:9092"): self.host = host self.client = KafkaClient(hosts=self.host) def producer_partition(self): """ 生产者分区查看，主要查看生产消息时offset的变化 :return: """ topic = self.client.topics["test_topic".encode()] partitions = topic.partitions print (u"查看所有分区 {}".format(partitions)) earliest_offset = topic.earliest_available_offsets() print(u"获取最早可用的offset {}".format(earliest_offset)) # 生产消息之前看看offset last_offset = topic.latest_available_offsets() print(u"最近可用offset {}".format(last_offset)) # 同步生产消息 p = topic.get_producer(sync=True) p.produce(str(time.time()).encode()) # 查看offset的变化 last_offset = topic.latest_available_offsets() print(u"最近可用offset {}".format(last_offset)) def producer_designated_partition(self): """ 往指定分区写消息，如果要控制打印到某个分区，需要在获取生产者的时候指定选区函数，并且在生产消息的时候额外指定一个key :return: """ def assign_patition(pid, key): """ 指定特定分区, 这里测试写入第一个分区(id=0) :param pid: 为分区列表 :param key: :return: """ print("为消息分配partition {} {}".format(pid, key)) return pid[0] topic = self.client.topics["test_topic".encode()] p = topic.get_producer(sync=True, partitioner=assign_patition) p.produce(str(time.time()).encode(), partition_key=b"partition_key_0") def async_produce_message(self): """ 异步生产消息，消息会被推到一个队列里面，另外一个线程会在队列中消息大小满足一个阈值（min_queued_messages）或到达一段时间（linger_ms）后统一发送,默认5s :return: """ topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) # 记录最初的偏移量 old_offset = last_offset[0].offset[0] p = topic.get_producer(sync=False, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() while True: last_offset = topic.latest_available_offsets() print("最近可用offset {}".format(last_offset)) if last_offset[0].offset[0] != old_offset: e_time = time.time() print('cost time {}'.format(e_time-s_time)) break time.sleep(1) def get_produce_message_report(self): """ 查看异步发送消报告,默认会等待5s后才能获得报告 """ topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) p = topic.get_producer(sync=False, delivery_reports=True, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() delivery_report = p.get_delivery_report() e_time = time.time() print ('等待{}s, 递交报告{}'.format(e_time-s_time, delivery_report)) last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) if __name__ == '__main__': kafka_ins = KafkaTest() # kafka_ins.producer_partition() # kafka_ins.producer_designated_partition() # kafka_ins.async_produce_message() kafka_ins.get_produce_message_report()

100

101

102

103

104

105

106

107

108

109

110

#coding=utf-8

import time

from pykafka import KafkaClient

class KafkaTest ( object ) :

"""

测试kafka常用api

"""

def __init__ ( self , host = "192.168.237.129:9092" ) :

self . host = host

self . client = KafkaClient ( hosts = self . host )

def producer_partition ( self ) :

"""

生产者分区查看，主要查看生产消息时offset的变化

:return:

"""

topic = self . client . topics [ "test_topic" . encode ( ) ]

partitions = topic . partitions

print ( u "查看所有分区 {}" . format ( partitions ) )

earliest_offset = topic . earliest_available_offsets ( )

print ( u "获取最早可用的offset {}" . format ( earliest_offset ) )

# 生产消息之前看看offset

last_offset = topic . latest_available_offsets ( )

print ( u "最近可用offset {}" . format ( last_offset ) )

# 同步生产消息

p = topic . get_producer ( sync = True )

p . produce ( str ( time . time ( ) ) . encode ( ) )

# 查看offset的变化

last_offset = topic . latest_available_offsets ( )

print ( u "最近可用offset {}" . format ( last_offset ) )

def producer_designated_partition ( self ) :

"""

往指定分区写消息，如果要控制打印到某个分区，

需要在获取生产者的时候指定选区函数，

并且在生产消息的时候额外指定一个key

:return:

"""

def assign_patition ( pid , key ) :

"""

指定特定分区, 这里测试写入第一个分区(id=0)

:param pid: 为分区列表

:param key:

:return:

"""

print ( "为消息分配partition {} {}" . format ( pid , key ) )

return pid [ 0 ]

topic = self . client . topics [ "test_topic" . encode ( ) ]

p = topic . get_producer ( sync = True , partitioner = assign_patition )

p . produce ( str ( time . time ( ) ) . encode ( ) , partition_key = b "partition_key_0" )

def async_produce_message ( self ) :

"""

异步生产消息，消息会被推到一个队列里面，

另外一个线程会在队列中消息大小满足一个阈值（min_queued_messages）

或到达一段时间（linger_ms）后统一发送,默认5s

:return:

"""

topic = self . client . topics [ "kafka_test" . encode ( ) ]

last_offset = topic . latest_available_offsets ( )

print ( "最近的偏移量 offset {}" . format ( last_offset ) )

# 记录最初的偏移量

old_offset = last_offset [ 0 ] . offset [ 0 ]

p = topic . get_producer ( sync = False , partitioner = lambda pid , key : pid [ 0 ] )

p . produce ( str ( time . time ( ) ) . encode ( ) )

s_time = time . time ( )

while True :

last_offset = topic . latest_available_offsets ( )

print ( "最近可用offset {}" . format ( last_offset ) )

if last_offset [ 0 ] . offset [ 0 ] != old_offset :

e_time = time . time ( )

print ( 'cost time {}' . format ( e_time - s_time ) )

break

time . sleep ( 1 )

def get_produce_message_report ( self ) :

"""

查看异步发送消报告,默认会等待5s后才能获得报告

"""

topic = self . client . topics [ "kafka_test" . encode ( ) ]

last_offset = topic . latest_available_offsets ( )

print ( "最近的偏移量 offset {}" . format ( last_offset ) )

p = topic . get_producer ( sync = False , delivery_reports = True , partitioner = lambda pid , key : pid [ 0 ] )

p . produce ( str ( time . time ( ) ) . encode ( ) )

s_time = time . time ( )

delivery_report = p . get_delivery_report ( )

e_time = time . time ( )

print ( '等待{}s, 递交报告{}' . format ( e_time - s_time , delivery_report ) )

last_offset = topic . latest_available_offsets ( )

print ( "最近的偏移量 offset {}" . format ( last_offset ) )

if __name__ == '__main__' :

kafka_ins = KafkaTest ( )

# kafka_ins.producer_partition()

# kafka_ins.producer_designated_partition()

# kafka_ins.async_produce_message()

kafka_ins . get_produce_message_report ( )

注意要点: 多进程使用pykafka共享一个client，会造成只有进程能够正常的写入数据，如果使用了dliver_report（包括同步），会导致子进程彻底阻塞掉不可用
消费者
pykafka消费者分为simple和balanced两种
simple适用于需要消费指定分区且不需要自动的重分配(自定义)
balanced自动分配则选择
案例

Python

\#coding=utf-8 from pykafka import KafkaClient class KafkaTest(object): def __init__(self, host="192.168.237.129:9092"): self.host = host self.client = KafkaClient(hosts=self.host) def simple_consumer(self, offset=0): """ 消费者指定消费 :param offset: :return: """ topic = self.client.topics["kafka_test".encode()] partitions = topic.partitions last_offset = topic.latest_available_offsets() print("最近可用offset {}".format(last_offset)) # 查看所有分区 consumer = topic.get_simple_consumer(b"simple_consumer_group", partitions=[partitions[0]]) # 选择一个分区进行消费 offset_list = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset_list)) # 消费者拥有的分区offset的情况 consumer.reset_offsets([(partitions[0], offset)]) # 设置offset msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) offset = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset)) # 3 def balance_consumer(self, offset=0): """ 使用balance consumer去消费kafka :return: """ topic = self.client.topics["kafka_test".encode()] # managed=True 设置后，使用新式reblance分区方法，不需要使用zk，而False是通过zk来实现reblance的需要使用zk consumer = topic.get_balanced_consumer(b"consumer_group_balanced2", managed=True) partitions = topic.partitions print("分区 {}".format(partitions)) earliest_offsets = topic.earliest_available_offsets() print("最早可用offset {}".format(earliest_offsets)) last_offsets = topic.latest_available_offsets() print("最近可用offset {}".format(last_offsets)) offset = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset)) while True: msg = consumer.consume() offset = consumer.held_offsets print("{}, 当前消费者分区offset情况{}".format(msg.value.decode(), offset)) if __name__ == '__main__': kafka_ins = KafkaTest() # kafka_ins.simple_consumer() kafka_ins.balance_consumer()

\ #coding=utf-8

from pykafka import KafkaClient

class KafkaTest ( object ) :

def __init__ ( self , host = "192.168.237.129:9092" ) :

self . host = host

self . client = KafkaClient ( hosts = self . host )

def simple_consumer ( self , offset = 0 ) :

"""

消费者指定消费

:param offset:

:return:

"""

topic = self . client . topics [ "kafka_test" . encode ( ) ]

partitions = topic . partitions

last_offset = topic . latest_available_offsets ( )

print ( "最近可用offset {}" . format ( last_offset ) ) # 查看所有分区

consumer = topic . get_simple_consumer ( b "simple_consumer_group" , partitions = [ partitions [ 0 ] ] ) # 选择一个分区进行消费

offset_list = consumer . held_offsets

print ( "当前消费者分区offset情况{}" . format ( offset_list ) ) # 消费者拥有的分区offset的情况

consumer . reset_offsets ( [ ( partitions [ 0 ] , offset ) ] ) # 设置offset

msg = consumer . consume ( )

print ( "消费 :{}" . format ( msg . value . decode ( ) ) )

msg = consumer . consume ( )

print ( "消费 :{}" . format ( msg . value . decode ( ) ) )

msg = consumer . consume ( )

print ( "消费 :{}" . format ( msg . value . decode ( ) ) )

offset = consumer . held_offsets

print ( "当前消费者分区offset情况{}" . format ( offset ) ) # 3

def balance_consumer ( self , offset = 0 ) :

"""

使用balance consumer去消费kafka

:return:

"""

topic = self . client . topics [ "kafka_test" . encode ( ) ]

# managed=True 设置后，使用新式reblance分区方法，不需要使用zk，而False是通过zk来实现reblance的需要使用zk

consumer = topic . get_balanced_consumer ( b "consumer_group_balanced2" , managed = True )

partitions = topic . partitions

print ( "分区 {}" . format ( partitions ) )

earliest_offsets = topic . earliest_available_offsets ( )

print ( "最早可用offset {}" . format ( earliest_offsets ) )

last_offsets = topic . latest_available_offsets ( )

print ( "最近可用offset {}" . format ( last_offsets ) )

offset = consumer . held_offsets

print ( "当前消费者分区offset情况{}" . format ( offset ) )

while True :

msg = consumer . consume ( )

offset = consumer . held_offsets

print ( "{}, 当前消费者分区offset情况{}" . format ( msg . value . decode ( ) , offset ) )

if __name__ == '__main__' :

kafka_ins = KafkaTest ( )

# kafka_ins.simple_consumer()

kafka_ins . balance_consumer ( )

zeropython 微信公众号 5868037 QQ号 5868037@qq.com QQ邮箱

songhao8080

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫