python kafka offset_使用python对kafka offset 进行监控

1.由于kafka 的offset存储方式改变,支持之前的zookeeper和现在的kafka内部存储机制

2.实在没找到怎么获取kafka 内部存储的消费组,所以不能和zk里一样,能全部查出消费组,

配置需要监控的消费组

配置如下

# 监控的topic

topics = ['test_topic']

# 要监控的groupid

monitor_group_ids = ['consumer_1']

# broker-list

servers = 'localhost:9092'

# mysql args

dbargs = {'user': 'root', 'password': '1234', 'host': 'localhost', 'database': 'test'}

# 监控数据上报间隔 秒

time_interval = 2.5

# 历史全量数据上报间隔

history_time_interval = 5 * 60

# -*- coding:utf-8 -*-

import time

import sys

from kafka.client import KafkaClient

from kafka.protocol.commit import OffsetFetchRequest_v1, OffsetFetchResponse_v1, OffsetFetchRequest_v0, \

OffsetFetchResponse_v0

from kafka.protocol.offset import OffsetRequest_v0, OffsetResponse_v0

from mysql import connector

from sqlalchemy.engine import create_engine

from sqlalchemy.orm import create_session

from monitor_constants import *

duration = 0

client = None

conn = None

partition_cache = {}

brokers_cache = []

kafka_type = []

zk_type = []

insert_sql = ("INSERT INTO consumer_monitor "

"(group_id, topic, `partition`, `offset`, logsize, create_time) "

"VALUES (:group_id,:topic,:partition,:offset,:logsize,:create_time)")

history_insert_sql = ("INSERT INTO consumer_monitor_history "

"(group_id, topic, `partition`, `offset`, logsize, create_time) "

"VALUES (:group_id,:topic,:partition,:offset,:logsize,:create_time)")

select_sql = ("select count(1) "

"from consumer_monitor "

"where group_id=:group_id and topic=:topic and `partition`=:partition")

update_sql = ("update consumer_monitor "

"set `offset`=:offset,logsize=:logsize,create_time=:create_time "

"where group_id=:group_id and topic=:topic and `partition`=:partition")

def get_brokers():

if not brokers_cache:

brokers = client.cluster.brokers()

if brokers:

brokers_cache.extend([x.nodeId for x in brokers])

return brokers_cache

def get_partitions(topic):

if not partition_cache or topic not in partition_cache:

partitions = client.cluster.available_partitions_for_topic(topic)

if partitions:

partition_cache[topic] = [x for x in partitions]

else:

return []

return partition_cache[topic]

def get_logsize():

"""

获取topic 下每个partition的logsize(各个broker的累加)

:return:

"""

tp = {} # topic : partition_dict

brokers = get_brokers()

for topic in topics:

partitions = get_partitions(topic)

pl = {} # partition : logsize

for broker in brokers:

# 这里取笛卡尔积可能有问题,但是不影响parse中解析了

for partition in partitions:

client.send(broker, OffsetRequest_v0(replica_id=-1, topics=[(topic, [(partition, -1, 1)])]))

responses = client.poll()

pdict = parse_logsize(topic, partition, responses)

append(pl, pdict)

tp[topic] = pl

return tp

def append(rdict, pdict):

if rdict:

# 已经有记录,累加

for k, v in pdict.items():

if k in rdict:

rdict[k] = rdict[k] + v

else:

rdict[k] = v

else:

rdict.update(pdict)

def parse_logsize(t, p, responses):

"""

单个broker中单个partition的logsize

:param responses:

:param p:

:param t:

:return:

"""

for response in responses:

if not isinstance(response, OffsetResponse_v0):

return {}

tps = response.topics

topic = tps[0][0]

partition_list = tps[0][1]

partition = partition_list[0][0]

# 异步poll来的数据可能不准

if topic == t and partition == p and partition_list[0][1] == 0:

logsize_list = partition_list[0][2]

logsize = logsize_list[0]

return {partition: logsize}

return {}

def parse_offsets(t, responses):

dr = {}

for response in responses:

if not isinstance(response, (OffsetFetchResponse_v1, OffsetFetchResponse_v0)):

return {}

tps = response.topics

topic = tps[0][0]

partition_list = tps[0][1]

if topic == t:

for partition_tunple in partition_list:

if partition_tunple[3] == 0:

offset = partition_tunple[1]

dr[partition_tunple[0]] = offset

return dr

def get_offsets():

# {gid: dict}

gd = {}

for gid in monitor_group_ids:

td = {} # {topic:dict}

for topic in topics:

pd = {} # {partition:dict}

for broker in get_brokers():

partitions = get_partitions(topic)

if not partitions:

return {}

else:

responses = optionnal_send(broker, gid, topic, partitions)

dr = parse_offsets(topic, responses)

append(pd, dr)

td[topic] = pd

gd[gid] = td

return gd

def optionnal_send(broker, gid, topic, partitions):

if gid in kafka_type:

return kafka_send(broker, gid, topic, partitions)

elif gid in zk_type:

return zk_send(broker, gid, topic, partitions)

else:

responses = zk_send(broker, gid, topic, partitions)

dct = parse_offsets(topic, responses)

if is_suitable(dct):

zk_type.append(gid)

return responses

responses = kafka_send(broker, gid, topic, partitions)

dct = parse_offsets(topic, responses)

if is_suitable(dct):

kafka_type.append(gid)

return responses

def is_suitable(dct):

for x in dct.values():

if x != -1:

return True

def kafka_send(broker, gid, topic, partitions):

client.send(broker, OffsetFetchRequest_v1(consumer_group=gid, topics=[(topic, partitions)]))

return client.poll()

def zk_send(broker, gid, topic, partitions):

client.send(broker, OffsetFetchRequest_v0(consumer_group=gid, topics=[(topic, partitions)]))

return client.poll()

def initdb():

try:

config_url = 'mysql+mysqlconnector://' + dbargs['user'] + ':' + dbargs['password'] + '@' + dbargs[

'host'] + '/' + dbargs['database'] + '?charset=utf8'

engine = create_engine(config_url, echo=False, pool_recycle=4)

return create_session(bind=engine, autocommit=False)

except connector.Error as e:

print e

sys.exit(1)

def exec_sql(sql, param):

try:

result = conn.execute(sql, param)

conn.commit()

conn.close()

return result.rowcount

except Exception as e:

try:

conn.rollback()

conn.close()

except Exception as ex:

print ex

print e

def store_db(param):

dr = {'group_id': param[0], 'topic': param[1], 'partition': param[2], 'offset': param[3], 'logsize': param[4],

'create_time': param[5]}

global duration

if duration >= history_time_interval:

exec_sql(history_insert_sql, dr)

duration = 0

exist = exec_sql(select_sql, {'group_id': param[0], 'topic': param[1], 'partition': param[2]})

if exist and exist != 0:

exec_sql(update_sql, dr)

else:

exec_sql(insert_sql, dr)

def do_task():

offset_dict = get_offsets()

logsize_dict = get_logsize()

for gk, gv in offset_dict.items():

for tk, tv in gv.items():

for pk, pv in tv.items():

if logsize_dict and tk in logsize_dict:

dr = logsize_dict[tk] # partition:logsize

if dr and pk in dr:

param = (gk, tk, pk, pv, dr[pk],

time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

store_db(param)

if __name__ == "__main__":

conn = initdb()

client = KafkaClient(bootstrap_servers=servers, request_timeout_ms=3000)

while True:

do_task()

time.sleep(time_interval)

duration += time_interval

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
如果您在使用 Python 中的 Kafka 消费者时,指定了 `group_id`,但是无法获取到数据,可能是因为以下原因: 1. 消费者组中没有可用的分区或分区没有可用的消息。 2. 消费者组的 `group_id` 与之前的消费者实例相同,导致分区重新分配失败。 3. 消费者配置中的 `auto_offset_reset` 参数设置不当,导致无法消费已存在的消息。 4. 没有正确订阅主题或订阅的主题名称错误。 您可以尝试以下解决方法: 1. 确认 Kafka 主题中是否有可用的消息,以及分区是否已经分配给了消费者组。 2. 确认消费者组的 `group_id` 是否与之前的消费者实例相同,如果是,请使用新的 `group_id`。 3. 调整消费者配置中的 `auto_offset_reset` 参数,以确保可以消费已存在的消息。 4. 确认您已经正确订阅了主题,并且订阅的主题名称没有错误。 以下是一个订阅主题并消费消息的示例代码: ```python from kafka import KafkaConsumer consumer = KafkaConsumer( 'my_topic', bootstrap_servers=['localhost:9092'], auto_offset_reset='earliest', enable_auto_commit=True, group_id='my_group') for message in consumer: print(message.value) ``` 在上面的代码中,我们订阅名为 `my_topic` 的主题,并使用 `my_group` 作为消费者组的 `group_id`。我们还将 `auto_offset_reset` 参数设置为 `earliest`,以便从最早的消息开始消费。最后,我们使用 `for` 循环遍历消费者对象返回的消息,并打印消息的值。您可以根据自己的需求调整代码。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值