pykafka生产消费常用api

pykafka生产消费常用api

pykafka基本生产消费常用api
生产者
案例

Python
#coding=utf-8 import time from py<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/kafka" title="View all posts in kafka" target="_blank">kafka</a></span> import KafkaClient class KafkaTest(object): """ 测试<span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/kafka" title="View all posts in kafka" target="_blank">kafka</a></span>常用api """ def __init__(self, host="192.168.237.129:9092"): self.host = host self.client = KafkaClient(hosts=self.host) def producer_partition(self): """ 生产者分区查看,主要查看生产消息时offset的变化 :return: """ topic = self.client.topics["test_topic".encode()] partitions = topic.partitions print (u"查看所有分区 {}".format(partitions)) earliest_offset = topic.earliest_available_offsets() print(u"获取最早可用的offset {}".format(earliest_offset)) # 生产消息之前看看offset last_offset = topic.latest_available_offsets() print(u"最近可用offset {}".format(last_offset)) # 同步生产消息 p = topic.get_producer(sync=True) p.produce(str(time.time()).encode()) # 查看offset的变化 last_offset = topic.latest_available_offsets() print(u"最近可用offset {}".format(last_offset)) def producer_designated_partition(self): """ 往指定分区写消息,如果要控制打印到某个分区, 需要在获取生产者的时候指定选区函数, 并且在生产消息的时候额外指定一个key :return: """ def assign_patition(pid, key): """ 指定特定分区, 这里测试写入第一个分区(id=0) :param pid: 为分区列表 :param key: :return: """ print("为消息分配partition {} {}".format(pid, key)) return pid[0] topic = self.client.topics["test_topic".encode()] p = topic.get_producer(sync=True, partitioner=assign_patition) p.produce(str(time.time()).encode(), partition_key=b"partition_key_0") def async_produce_message(self): """ 异步生产消息,消息会被推到一个队列里面, 另外一个线程会在队列中消息大小满足一个阈值(min_queued_messages) 或到达一段时间(linger_ms)后统一发送,默认5s :return: """ topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) # 记录最初的偏移量 old_offset = last_offset[0].offset[0] p = topic.get_producer(sync=False, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() while True: last_offset = topic.latest_available_offsets() print("最近可用offset {}".format(last_offset)) if last_offset[0].offset[0] != old_offset: e_time = time.time() print('cost time {}'.format(e_time-s_time)) break time.sleep(1) def get_produce_message_report(self): """ 查看异步发送消报告,默认会等待5s后才能获得报告 """ topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) p = topic.get_producer(sync=False, delivery_reports=True, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() delivery_report = p.get_delivery_report() e_time = time.time() print ('等待{}s, 递交报告{}'.format(e_time-s_time, delivery_report)) last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) if __name__ == '__main__': kafka_ins = KafkaTest() # kafka_ins.producer_partition() # kafka_ins.producer_designated_partition() # kafka_ins.async_produce_message() kafka_ins.get_produce_message_report()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#coding=utf-8
 
import time
from pykafka import KafkaClient
 
 
class KafkaTest ( object ) :
     """
    测试kafka常用api
    """
     def __init__ ( self , host = "192.168.237.129:9092" ) :
         self . host = host
         self . client = KafkaClient ( hosts = self . host )
 
     def producer_partition ( self ) :
         """
        生产者分区查看,主要查看生产消息时offset的变化
        :return:
        """
         topic = self . client . topics [ "test_topic" . encode ( ) ]
         partitions = topic . partitions
         print ( u "查看所有分区 {}" . format ( partitions ) )
 
         earliest_offset = topic . earliest_available_offsets ( )
         print ( u "获取最早可用的offset {}" . format ( earliest_offset ) )
 
         # 生产消息之前看看offset
         last_offset = topic . latest_available_offsets ( )
         print ( u "最近可用offset {}" . format ( last_offset ) )
 
         # 同步生产消息
         p = topic . get_producer ( sync = True )
         p . produce ( str ( time . time ( ) ) . encode ( ) )
 
         # 查看offset的变化
         last_offset = topic . latest_available_offsets ( )
         print ( u "最近可用offset {}" . format ( last_offset ) )
 
     def producer_designated_partition ( self ) :
         """
        往指定分区写消息,如果要控制打印到某个分区,
        需要在获取生产者的时候指定选区函数,
        并且在生产消息的时候额外指定一个key
        :return:
        """
 
         def assign_patition ( pid , key ) :
             """
            指定特定分区, 这里测试写入第一个分区(id=0)
            :param pid: 为分区列表
            :param key:
            :return:
            """
             print ( "为消息分配partition {} {}" . format ( pid , key ) )
             return pid [ 0 ]
 
         topic = self . client . topics [ "test_topic" . encode ( ) ]
         p = topic . get_producer ( sync = True , partitioner = assign_patition )
         p . produce ( str ( time . time ( ) ) . encode ( ) , partition_key = b "partition_key_0" )
 
     def async_produce_message ( self ) :
         """
        异步生产消息,消息会被推到一个队列里面,
        另外一个线程会在队列中消息大小满足一个阈值(min_queued_messages)
        或到达一段时间(linger_ms)后统一发送,默认5s
        :return:
        """
         topic = self . client . topics [ "kafka_test" . encode ( ) ]
         last_offset = topic . latest_available_offsets ( )
         print ( "最近的偏移量 offset {}" . format ( last_offset ) )
 
         # 记录最初的偏移量
         old_offset = last_offset [ 0 ] . offset [ 0 ]
         p = topic . get_producer ( sync = False , partitioner = lambda pid , key : pid [ 0 ] )
         p . produce ( str ( time . time ( ) ) . encode ( ) )
         s_time = time . time ( )
         while True :
             last_offset = topic . latest_available_offsets ( )
             print ( "最近可用offset {}" . format ( last_offset ) )
             if last_offset [ 0 ] . offset [ 0 ] != old_offset :
                 e_time = time . time ( )
                 print ( 'cost time {}' . format ( e_time - s_time ) )
                 break
             time . sleep ( 1 )
 
     def get_produce_message_report ( self ) :
         """
        查看异步发送消报告,默认会等待5s后才能获得报告
        """
         topic = self . client . topics [ "kafka_test" . encode ( ) ]
         last_offset = topic . latest_available_offsets ( )
         print ( "最近的偏移量 offset {}" . format ( last_offset ) )
         p = topic . get_producer ( sync = False , delivery_reports = True , partitioner = lambda pid , key : pid [ 0 ] )
         p . produce ( str ( time . time ( ) ) . encode ( ) )
         s_time = time . time ( )
         delivery_report = p . get_delivery_report ( )
         e_time = time . time ( )
         print ( '等待{}s, 递交报告{}' . format ( e_time - s_time , delivery_report ) )
         last_offset = topic . latest_available_offsets ( )
         print ( "最近的偏移量 offset {}" . format ( last_offset ) )
 
 
 
if __name__ == '__main__' :
     kafka_ins = KafkaTest ( )
     # kafka_ins.producer_partition()
     # kafka_ins.producer_designated_partition()
     # kafka_ins.async_produce_message()
     kafka_ins . get_produce_message_report ( )
 

注意要点: 多进程使用pykafka共享一个client,会造成只有进程能够正常的写入数据,如果使用了dliver_report(包括同步),会导致子进程彻底阻塞掉不可用
消费者
pykafka消费者分为simple和balanced两种
simple适用于需要消费指定分区且不需要自动的重分配(自定义)
balanced自动分配则选择
案例

Python
<br />\#coding=utf-8 from pykafka import KafkaClient class KafkaTest(object): def __init__(self, host="192.168.237.129:9092"): self.host = host self.client = KafkaClient(hosts=self.host) def simple_consumer(self, offset=0): """ 消费者指定消费 :param offset: :return: """ topic = self.client.topics["kafka_test".encode()] partitions = topic.partitions last_offset = topic.latest_available_offsets() print("最近可用offset {}".format(last_offset)) # 查看所有分区 consumer = topic.get_simple_consumer(b"simple_consumer_group", partitions=[partitions[0]]) # 选择一个分区进行消费 offset_list = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset_list)) # 消费者拥有的分区offset的情况 consumer.reset_offsets([(partitions[0], offset)]) # 设置offset msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) msg = consumer.consume() print("消费 :{}".format(msg.value.decode())) offset = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset)) # 3 def balance_consumer(self, offset=0): """ 使用balance consumer去消费kafka :return: """ topic = self.client.topics["kafka_test".encode()] # managed=True 设置后,使用新式reblance分区方法,不需要使用zk,而False是通过zk来实现reblance的需要使用zk consumer = topic.get_balanced_consumer(b"consumer_group_balanced2", managed=True) partitions = topic.partitions print("分区 {}".format(partitions)) earliest_offsets = topic.earliest_available_offsets() print("最早可用offset {}".format(earliest_offsets)) last_offsets = topic.latest_available_offsets() print("最近可用offset {}".format(last_offsets)) offset = consumer.held_offsets print("当前消费者分区offset情况{}".format(offset)) while True: msg = consumer.consume() offset = consumer.held_offsets print("{}, 当前消费者分区offset情况{}".format(msg.value.decode(), offset)) if __name__ == '__main__': kafka_ins = KafkaTest() # kafka_ins.simple_consumer() kafka_ins.balance_consumer()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
< br / > \ #coding=utf-8
 
from pykafka import KafkaClient
 
 
class KafkaTest ( object ) :
     def __init__ ( self , host = "192.168.237.129:9092" ) :
         self . host = host
         self . client = KafkaClient ( hosts = self . host )
 
     def simple_consumer ( self , offset = 0 ) :
         """
        消费者指定消费
        :param offset:
        :return:
        """
 
         topic = self . client . topics [ "kafka_test" . encode ( ) ]
         partitions = topic . partitions
         last_offset = topic . latest_available_offsets ( )
         print ( "最近可用offset {}" . format ( last_offset ) )    # 查看所有分区
         consumer = topic . get_simple_consumer ( b "simple_consumer_group" , partitions = [ partitions [ 0 ] ] )    # 选择一个分区进行消费
         offset_list = consumer . held_offsets
         print ( "当前消费者分区offset情况{}" . format ( offset_list ) )    # 消费者拥有的分区offset的情况
         consumer . reset_offsets ( [ ( partitions [ 0 ] , offset ) ] )    # 设置offset
         msg = consumer . consume ( )
         print ( "消费 :{}" . format ( msg . value . decode ( ) ) )
         msg = consumer . consume ( )
         print ( "消费 :{}" . format ( msg . value . decode ( ) ) )
         msg = consumer . consume ( )
         print ( "消费 :{}" . format ( msg . value . decode ( ) ) )
         offset = consumer . held_offsets
         print ( "当前消费者分区offset情况{}" . format ( offset ) ) # 3
 
     def balance_consumer ( self , offset = 0 ) :
         """
        使用balance consumer去消费kafka
        :return:
        """
         topic = self . client . topics [ "kafka_test" . encode ( ) ]
         # managed=True 设置后,使用新式reblance分区方法,不需要使用zk,而False是通过zk来实现reblance的需要使用zk
         consumer = topic . get_balanced_consumer ( b "consumer_group_balanced2" , managed = True )
         partitions = topic . partitions
         print ( "分区 {}" . format ( partitions ) )
         earliest_offsets = topic . earliest_available_offsets ( )
         print ( "最早可用offset {}" . format ( earliest_offsets ) )
         last_offsets = topic . latest_available_offsets ( )
         print ( "最近可用offset {}" . format ( last_offsets ) )
         offset = consumer . held_offsets
         print ( "当前消费者分区offset情况{}" . format ( offset ) )
         while True :
             msg = consumer . consume ( )
             offset = consumer . held_offsets
             print ( "{}, 当前消费者分区offset情况{}" . format ( msg . value . decode ( ) , offset ) )
 
if __name__ == '__main__' :
     kafka_ins = KafkaTest ( )
     # kafka_ins.simple_consumer()
     kafka_ins . balance_consumer ( )
 



  • zeropython 微信公众号 5868037 QQ号 5868037@qq.com QQ邮箱
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值