一、python中安装python-kafka模块包
系统环境:centos7
1. 在线安装
pip install kafka-python
2. 离线安装
下载安装包 kafka-python-2.0.2.tar.gz (适合python2和python3)
解压后执行:
python setup.py install
二、python向kafka中发送日志
kafka服务器:192.168.149.153:9092,将日志发送到 test1 主题中
import json
from kafka import KafkaProducer
from kafka import KafkaConsumer
import time
# 创造数据
data_list=[]
for i in range(1,100):
data = {}
key = "num{}".format(i)
data[key] = i
data_list.append(data)
# 只管发送,不对返回结果做处理
def send_data():
start_time = time.time()
producer = KafkaProducer(bootstrap_servers='192.168.149.153:9092')
for data in data_list:
data_str=json.dumps(data) # 将字典转化为字符串格式
future = producer.send('test1', data_str.encode()) # python3需要编码,python2不需要
producer.flush()
producer.close()
end_time = time.time()
span_time = end_time - start_time
print("time cost:{}".format(span_time))
# 同步发送消息(通过get方法等待Kafka的响应,判断消息是否发送成功)
def sync_send():
start_time = time.time()
producer = KafkaProducer(bootstrap_servers='192.168.149.153:9092')
for data in data_list:
data_str=json.dumps(data) # 将字典转化为字符串格式
future = producer.send('test1', data_str.encode()) # python3需要编码,python2不需要
try:
record_metadata = future.get(timeout=10) # 同步确认消费
# 同步阻塞,通过调用get()方法进而保证一定程序是有序的.
#partition = record_metadata.partition # 数据所在的分区
#offset = record_metadata.offset # 数据所在分区的位置
#print('save success, partition: {}, offset: {}'.format(partition, offset))
except Exception as e:
print("Error:{}".format(e))
producer.close()
end_time = time.time()
span_time = end_time - start_time
print("time cost:{}".format(span_time))
# send_data()
# sync_send()
三、python消费kafka
# 消费数据
consumer = KafkaConsumer('test1',
bootstrap_servers='192.168.149.153:9092',
auto_offset_reset='earliest',value_deserializer=json.loads
)
for message in consumer:
print(message.value)