![68d49ec05892e6bf2c5a00433e1de1d4.png](https://i-blog.csdnimg.cn/blog_migrate/107cf26c1d8d6c0440864644121d734e.jpeg)
技术博客: https://github.com/yongxinz/tech-blog
同时,也欢迎关注我的微信公众号 AlwaysBeta,更多精彩内容等你来。
![bdd34dec0821c9931b9eb0ed1be43dfd.png](https://i-blog.csdnimg.cn/blog_migrate/740b897b5dab776b069506fb830dc524.jpeg)
所用 Python 依赖包:kafka-python 1.3.3
生产者:
# -*- coding:utf-8 -*-from kafka import KafkaProducer# 此处ip可以是多个['0.0.0.1:9092','0.0.0.2:9092','0.0.0.3:9092' ]producer = KafkaProducer(bootstrap_servers=['localhost:9092'])for i in range(3): msg = "msg%d" % i producer.send('test', msg)producer.close()
生产者-压缩消息发送
# -*- coding:utf-8 -*-from kafka import KafkaProducer# 此处ip可以是多个['0.0.0.1:9092','0.0.0.2:9092','0.0.0.3:9092' ]producer = KafkaProducer(bootstrap_servers=['localhost:9092'], compression_type='gzip')for i in range(3): msg = "msg%d" % i producer.send('test', msg)producer.close()
若消息过大,可压缩消息发送,可选值为 gzip, snappy, lz4。
生产者-json 数据
# -*- coding:utf-8 -*-import jsonfrom kafka import KafkaProducer# 此处ip可以是多个['0.0.0.1:9092','0.0.0.2:9092','0.0.0.3:9092' ]producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda m: json.dumps(m).encode('ascii'))for i in range(3): msg = "msg%d" % i producer.send('test', {msg: msg})producer.close()
消费者:
# -*- coding:utf-8 -*-from kafka import KafkaConsumerconsumer = KafkaConsumer('test', bootstrap_servers=['localhost:9092'])for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
先启动消费者,再启动生产者,可以看到消费者程序可以正常消费消息。
消费者-json 数据
# -*- coding:utf-8 -*-import jsonfrom kafka import KafkaConsumerconsumer = KafkaConsumer('test', bootstrap_servers=['localhost:9092'], value_deserializer=lambda m: json.loads(m.decode('ascii')))for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
key 同样支持 json 格式生产和消费,只需指定 key_serializer 和 key_deserializer。
消费者-读取最早可读消息
# -*- coding:utf-8 -*-from kafka import KafkaConsumerconsumer = KafkaConsumer('test', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest')for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
earliest 移到最早的可用消息,latest 最新的消息。
消费者-手动设置偏移量
# -*- coding:utf-8 -*-from kafka import KafkaConsumerfrom kafka.structs import TopicPartitionconsumer = KafkaConsumer('test', bootstrap_servers=['localhost:9092'])# 获取test主题的分区信息print consumer.partitions_for_topic('test')# 获取主题列表print consumer.topics()# 获取当前消费者订阅的主题print consumer.subscription()# 获取当前消费者topic、分区信息print consumer.assignment()# 获取当前主题的最新偏移量print consumer.position(TopicPartition(topic='test', partition=0))# 重置偏移量,从第1个偏移量消费consumer.seek(TopicPartition(topic='test', partition=0), 1)for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
消费者-订阅多个主题
# -*- coding:utf-8 -*-from kafka import KafkaConsumerconsumer = KafkaConsumer(bootstrap_servers=['localhost:9092'])# 订阅要消费的主题consumer.subscribe(topics=['test', 'test0'])for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
消费者-手动拉取消息
# -*- coding:utf-8 -*-import timefrom kafka import KafkaConsumerconsumer = KafkaConsumer(bootstrap_servers=['localhost:9092'])# 订阅要消费的主题consumer.subscribe(topics=['test', 'test0'])while True: msg = consumer.poll(timeout_ms=5) print msg time.sleep(1)
消费者-消息挂起与恢复
# -*- coding:utf-8 -*-import timefrom kafka import KafkaConsumerfrom kafka.structs import TopicPartitionconsumer = KafkaConsumer(bootstrap_servers=['localhost:9092'])# 订阅要消费的主题consumer.subscribe(topics=['test'])# 这句要有,否则报 KeyError: TopicPartition(topic='test', partition=0)consumer.topics()consumer.pause(TopicPartition(topic='test', partition=0))num = 0while True: print num # 获取当前挂起的消费者 print consumer.paused() msg = consumer.poll(timeout_ms=5) print msg time.sleep(1) num = num + 1 if num == 10: consumer.resume(TopicPartition(topic='test', partition=0)) print "resume...... "
消费者组:
# -*- coding:utf-8 -*-from kafka import KafkaConsumerconsumer = KafkaConsumer('test', group_id='my-group', bootstrap_servers=['localhost:9092'])for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
启动多个消费者,消费组可以横向扩展提高处理能力。
启动程序之后,执行下面命令,即可看到消费者组列表:
/usr/local/kafka/bin/kafka-consumer-groups.sh --bootstrap-server localhost:9092 --list