pykafka的使用问题
由于业务需要,需使用kafka,由于底层使用的是python,要是使用python的API
现有的包有kafka-python和pykafka,前者需要自己手动管理消费者的offset,比较麻烦,pykafka可使用zk管理offset,相对简单
pykafka Demo
在选定号方案之后,一个简单的Demo如下:
# encoding:utf-8
"""
使用pykafka,通过zk维护消费者的offset
"""
from pykafka import KafkaClient
from pykafka.common import OffsetType
import threading
host = "127.0.0.1:9092"
client = KafkaClient(hosts=host)
print client.topics
# 消费者
topic = client.topics["dp_test".encode(encoding='utf-8')]
print "topic:", topic
# consumer = topic.get_simple_consumer(consumer_group='pygroup',
# auto_commit_enable=True,
# auto_commit_interval_ms=1,
# consumer_id='consumer01')
consumer = topic.get_balanced_consumer(consumer_group='pygroup1',
auto_commit_enable=True,
auto_commit_interval_ms=1,
zookeeper_connect='127.0.0.1:2181',
auto_offset_reset=OffsetType.LATEST
)
for msg in consumer:
print "*" * 30
if msg is not None:
# print msg
print(msg.partition,msg.offset,msg.value.decode())
但pykafka始终报错,如下:
File “/Users/qiuyueyang/PycharmProjects/PyKafkaDemo/venv/lib/python2.7/site-packages/pykafka/utils/struct_helpers.py”, line 115, in _unpack_array
item, offset = _unpack(fmt, buff, offset)
File “/Users/qiuyueyang/PycharmProjects/PyKafkaDemo/venv/lib/python2.7/site-packages/pykafka/utils/struct_helpers.py”, line 96, in _unpack
items.extend(struct.unpack_from(’!’ + ch, buff, offset))
struct.error: unpack_from requires a buffer of at least 24436 bytes
解决方法
不得不说,StackOverFlow真是个强大的社区,引起改错误的原因是版本的原因
broker_version kwarg的结果KafkaClient(它默认为0.9.0)。具体细节:
class KafkaClient(object):
def __init__(self,
hosts='127.0.0.1:9092',
zookeeper_hosts=None,
socket_timeout_ms=30 * 1000,
offsets_channel_socket_timeout_ms=10 * 1000,
use_greenlets=False,
exclude_internal_topics=True,
source_address='',
ssl_config=None,
broker_version='0.9.0'):
#可看到构造器内默认版本为0.9.0,而本地版本为0.8.2.1
更新
# encoding:utf-8
"""
使用pykafka,通过zk维护消费者的offset
"""
from pykafka import KafkaClient
from pykafka.common import OffsetType
import threading
host = "127.0.0.1:9092"
# 版本0.8.2.1, 该参数的设定非常重要
client = KafkaClient(hosts=host, broker_version='0.8.2.1')
print client.topics
# 消费者
topic = client.topics["dp_test".encode(encoding='utf-8')]
print "topic:", topic
# consumer = topic.get_simple_consumer(consumer_group='pygroup',
# auto_commit_enable=True,
# auto_commit_interval_ms=1,
# consumer_id='consumer01')
consumer = topic.get_balanced_consumer(consumer_group='pygroup1',
auto_commit_enable=True,
auto_commit_interval_ms=1,
zookeeper_connect='127.0.0.1:2181',
auto_offset_reset=OffsetType.LATEST
)
for msg in consumer:
print "*" * 30
if msg is not None:
# print msg
print(msg.partition,msg.offset,msg.value.decode())