producer
安装kafka-python库,之后进行构建
# Part 1: Produce data into Kafka (optional)
# $ pip3 install kafka-python
import kafka
producer = kafka.KafkaProducer(bootstrap_servers=['localhost:9092'])
# NOTE: Data from https://www.kaggle.com/mlg-ulb/creditcardfraud/data
with open('sensor_data-Copy1.csv', 'r') as f:
# next(f) # skip header line
count = 0
for line in f:
producer.send('creditcard-test-0', line.rstrip().encode())
count += 1
print(count, "records has been produced in 'creditcard-test-0'")
producer.flush()
服务器为localhost:9092;
读取sensor_data-Copy1.csv文件;
发送至creditcard-test-0主题
consumer
from kafka import KafkaConsumer
from kafka.structs import TopicPartition
consumer = KafkaConsumer('creditcard-test-0',
bootstrap_servers='localhost:9092',
group_id='creditcard',
auto_offset_reset='earliest')
print('consumer start to consuming...')
consumer.subscribe(('creditcard-test-0', ))
# 设置偏置位从0开始读取
consumer.seek(TopicPartition(topic='creditcard-test-0', partition=0), 0)
filename = 'data-sensor.txt'
output = open(filename,'w')
for message in consumer:
# print(message.topic, message.offset, message.key, message.value, message.value, message.partition)
print(bytes.decode(message.value))
output.write(bytes.decode(message.value))
output.write("\n")
count += 1
if count == 10001:
break
output.close()
print("================================")
print("message: %d" %(count))
'''
msg = consumer.poll(timeout_ms=3)
print(msg["values"])
list(msg.keys())
output = open('data.txt','w')
output.write(msg.values())
'''
将文件读取到data-sensor.txt中
参考文档:
python读取Kafka
kafka实战教程(python操作kafka),kafka配置文件详解
txt => csv
由于后续需要使用数据来构建模型进行预测,因此这一步来讲读取到的文件转换成csv,便于后续使用
import csv
with open('data-sensor.csv', 'w+', newline='') as csvfile:
spamwriter = csv.writer(csvfile)
# 读要转换的txt文件,文件每行各词间以字符分隔
with open('data-sensor.txt', 'r') as filein:
count = 0
for line in filein:
# 由于第一行为含有""符号的string型数据,因此需要除去"符号
if count == 0:
line = line.replace('"','')
#这里的数据之间是以 , 间隔的
line_list = line.strip('\n').split(',')
spamwriter.writerow(line_list)
count += 1
参考文档:
pandas写入csv文件