1 安装包 Kafka-Python
官网下载最新的release包Kafka-Python压缩包 https://github.com/mumrah/kafka-python/releases
编译安装
- $tar -xvf kafka-python-0.9.3.tar.gz
- $cd kafka-python-0.9.3.tar.gz
- $python setup.py install
如果报下面的异常说明没有安装setuptools,请参考下文进行安装:
[root@hadoop01 kafka-python-0.9.3]# python setup.py install
Traceback (most recent call last):
File "setup.py", line 3, in <module>
from setuptools import setup, Command
ImportError: No module named setuptools
2 安装依赖模块setuptools和six
https://pypi.python.org/pypi/setuptools/14.3.1#downloads下载setuptools包
编译安装
- $tar zxvf setuptools-14.3.1.tar.gz
- $cd setuptools-14.3.1
- $python setup.py build
- $python setup.py install
import time
__author__ = 'aihua.sun'
import logging
import random,string
from kafka.producer import SimpleProducer
from kafka.client import KafkaClient
LOG = logging.getLogger('kafka_producer')
class TrueCloudDataPointProducer():
def __init__(self,hosts,batch_send=False,batch_send_every_n=20,topic="true_cloud_datapoint_topic"):
self.hosts=hosts
self.client=KafkaClient(self.hosts)
self.batch_send=batch_send
self.batch_send_every_n=batch_send_every_n
self.producer = SimpleProducer(self.client,batch_send=batch_send,batch_send_every_n=batch_send_every_n)
self.topic=topic
def send_messages(self,msg):
self.producer.send_messages(self.topic,msg)
def get_instance():
hosts={'hadoop01':'9092','hadoop01':'9093','hadoop01':'9094','hadoop101':'9095','hadoop02':'9092','hadoop02':'9093','hadoop02':'9094'}
return TrueCloudDataPointProducer(hosts)
if __name__=="__main__":
begin=time.time()
producer=get_instance()
for i in range(0,10000):
msg='Message'+str(i)+' '+''.join(random.choice(string.lowercase) for i in range(64))+'\n'
producer.send_messages(msg)
end=time.time()
print("use time:"+str((end-begin)))
从https://pypi.python.org/pypi/six/1.9.0 下载six-1.9.0.tar.gz
编译安装
- $tar zxvf six-1.9.0.tar.gz
- $cd setuptools-0.6c11
- $python setup.py build
- $python setup.py install
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from kafka import KafkaProducer
from kafka import KafkaConsumer
from kafka.errors import KafkaError
import json
class Kafka_producer():
'''
使用kafka的生产模块
'''
def __init__(self, kafkahost,kafkaport, kafkatopic):
self.kafkaHost = kafkahost
self.kafkaPort = kafkaport
self.kafkatopic = kafkatopic
self.producer = KafkaProducer(bootstrap_servers = '{kafka_host}:{kafka_port}'.format(
kafka_host=self.kafkaHost,
kafka_port=self.kafkaPort
))
def sendjsondata(self, params):
try:
parmas_message = json.dumps(params)
producer = self.producer
producer.send(self.kafkatopic, parmas_message.encode('utf-8'))
producer.flush()
except KafkaError as e:
print e
class Kafka_consumer():
'''
使用Kafka—python的消费模块
'''
def __init__(self, kafkahost, kafkaport, kafkatopic, groupid):
self.kafkaHost = kafkahost
self.kafkaPort = kafkaport
self.kafkatopic = kafkatopic
self.groupid = groupid
self.consumer = KafkaConsumer(self.kafkatopic, group_id = self.groupid,
bootstrap_servers = '{kafka_host}:{kafka_port}'.format(
kafka_host=self.kafkaHost,
kafka_port=self.kafkaPort ))
def consume_data(self):
try:
for message in self.consumer:
# print json.loads(message.value)
yield message
except KeyboardInterrupt, e:
print e
def main():
'''
测试consumer和producer
:return:
'''
##测试生产模块
#producer = Kafka_producer("127.0.0.1", 9092, "ranktest")
#for id in range(10):
# params = '{abetst}:{null}---'+str(i)
# producer.sendjsondata(params)
##测试消费模块
#消费模块的返回格式为ConsumerRecord(topic=u'ranktest', partition=0, offset=202, timestamp=None,
#\timestamp_type=None, key=None, value='"{abetst}:{null}---0"', checksum=-1868164195,
#\serialized_key_size=-1, serialized_value_size=21)
consumer = Kafka_consumer('127.0.0.1', 9092, "ranktest", 'test-python-ranktest')
message = consumer.consume_data()
for i in message:
print i.value
if __name__ == '__main__':
main()