1.安装pykafka
pip install pykafka
2.下载安装
git clone https://github.com/Parsely/pykafka.git
然后将下载下来的pykafka文件夹下的pykafka文件(pykafka的库文件)放到/Library/Python/2.7/site-packages/路径下即可
3.假设你有至少一个卡夫卡实例在本地运行,你可以使用pykafka连接它。
consumer.py 消费者
#!/usr/bin/python # -*- coding:utf-8 -*- from pykafka import KafkaClient #kafka默认端口为9092 client = KafkaClient(hosts='192.168.1.140:9092,192.168.1.141:9092,192.168.1.142:9092')#这里连接多个客户端 topic = client.topics['test_kafka_topic'] #从zookeeper消费,zookeeper的默认端口为2181 balanced_consumer = topic.get_balanced_consumer( consumer_group='test_kafka_group', auto_commit_enable=True, # 设置为False的时候不需要添加consumer_group,直接连接topic即可取到消息 zookeeper_connect='192.168.1.140:2181,192.168.1.141:2181,192.168.1.142:2181'#这里就是连接多个zk ) for message in balanced_consumer: # print message if message is not None: print message.offset, message.value#打印接收到的消息体的偏移个数和值
producer.py 生产者
#!/usr/bin/python # -*- coding:utf-8 -*- from pykafka import KafkaClient client = KafkaClient(hosts ="192.168.1.140:9092,192.168.1.141:9092,192.168.1.142:9092") #可接受多个client #查看所有的topic client.topics print client.topics topic = client.topics['test_kafka_topic']#选择一个topic message ="test message test message" #当有了topic之后呢,可以创建一个producer,来发消息,生产kafka数据,通过字符串形式, with topic.get_sync_producer() as producer: producer.produce(message) #The example above would produce to kafka synchronously - #the call only returns after we have confirmation that the message made it to the cluster. #以上的例子将产生kafka同步消息,这个调用仅仅在我们已经确认消息已经发送到集群之后 #但生产环境,为了达到高吞吐量,要采用异步的方式,通过delivery_reports =True来启用队列接口; with topic.get_sync_producer() as producer: producer.produce('test message',partition_key='{}'.) producer=topic.get_producer() producer.produce(message) print message