概述
代码环境依赖:debian9,python - 3.8.5,kafka-python,其中kafka-python需要使用pip安装(pip3 install kafka-python
)。性能上,kafkaComsumer的读速度和java相当(单进程单线程下)。
例程
#!/usr/bin/python3
import json
from kafka import KafkaConsumer
import time
def init_kafka(kafka_brokers, kafka_topics):
consumer = KafkaConsumer(bootstrap_servers=kafka_brokers,
group_id='comsumer' + str(time.time()),
value_deserializer=lambda m: json.loads(m.decode('ascii')),
# consumer_timeout_ms=1000,
# max_poll_interval_ms=2000,
# request_timeout_ms=1200500,
# connections_max_idle_ms=1440000,
max_poll_records=128)
consumer.subscribe(kafka_topics)
return consumer
def read_kafka(kafka_brokers, kafka_topics):
print('read_kafka process beginning ...')
consumer = init_kafka(kafka_brokers, kafka_topics)
now = lambda: time.time()
while 1:
start = now()
count = 0
records = consumer.poll(timeout_ms=500)
if records:
for record in records.values():
for line in record:
count = count + 1
data = line.value
# 打印
# print(data)
print('read kafka cost time: {} count: {}'.format(now() - start, count))
if __name__ == '__main__':
brokers = '1.10.0.1:9092,1.10.0.1:9093,1.10.0.2:9092,1.10.0.2:9093,1.10.0.3:9092,1.10.0.3:9093'
topics = 'test'
read_kafka(brokers, topics)