kafka在排查问题时需要很多历史的数据,如消费offset,topic的startoffset和endoffset、消费ip等,有些数据是metrics里没有的,就需要自采集监控,可以灵活查询历史数据定位问题,也需要数据编辑展示各个维度,topic、消费组、集群等层面的监控,灵活方便,一览无余
1.使用python结合go脚本采集
集群的topic、group分区的offset信息,ip,consumer_id,client_id等信息,并计算出topic的生活速率,消费组的消费速率
先上go代码,python这里调用了go主要是为了获取到消费组分区的ip,consumer_id和client_id,消费状态等,使用时 go build一下放在python代码同一级目录就可以了
package main
import (
"flag"
"fmt"
"log"
"github.com/Shopify/sarama"
)
var conf = struct {
Cmd string
Host string
Topic string
Group string
Version string
}{}
func main() {
flag.StringVar(&conf.Cmd, "cmd", "", "Command")
flag.StringVar(&conf.Host, "host", "localhost:9093", "Common separated kafka hosts")
flag.StringVar(&conf.Topic, "topic", "", "Kafka topic")
flag.StringVar(&conf.Group, "group", "", "Kafka group")
flag.StringVar(&conf.Version, "version", "", "kafka broker version")
flag.Parse()
ver, err := sarama.ParseKafkaVersion(conf.Version)
if err != nil {
log.Fatal(fmt.Errorf("ParseKafkaVersion failed, err: %+v\n", err))
}
c := sarama.NewConfig()
c.Version = ver
ca, err := sarama.NewClusterAdmin([]string{conf.Host}, c)
if err != nil {
log.Fatal(fmt.Errorf("NewClusterAdmin failed, err: %+v\n", err))
}
defer ca.Close()
if conf.Cmd == "list_topics" {
mp, err := ca.ListTopics()
if err != nil {
log.Fatal(fmt.Errorf("ListTopics failed, err: %+v\n", err))
}
for name, topic := range mp {
fmt.Printf("topic: %s\tpartition: %d\treplication: %d\n", name, topic.NumPartitions, topic.ReplicationFactor)
}
return
}
groups, err := ca.DescribeConsumerGroups([]string{conf.Group})
if err != nil {
log.Fatal(fmt.Errorf("ListConsumerGroups failed, err: %+v\n", err))
}
for _, gd := range groups {
fmt.Printf("groupid: %s\tstate: %s\tProtocolType: %s\tProtocol: %s\n",
gd.GroupId, gd.State, gd.ProtocolType, gd.Protocol)
for key, member := range gd.Members {
fmt.Printf("\tkey: %s, \tclientid: %s\thost: %s\n", key, member.ClientId, member.ClientHost)
cgma, err := member.GetMemberAssignment()
if err != nil {
log.Fatal(fmt.Errorf("GetMemberAssignment failed, err: %+v\n", err))
}
for topic, partitions := range cgma.Topics {
fmt.Printf("\t\ttopic: %s, \tpartitions: %+v\n", topic, partitions)
}
}
}
}
1.1 常规采集
#!/usr/bin/env python
# -* - coding:UTF-8 -*-
import os,re
import datetime,time,commands
from kafka.admin import KafkaAdminClient
from kafka import KafkaConsumer
from kafka.structs import TopicPartition
#start_time = datetime.datetime.now().replace(microsecond=0).strftime('%Y-%m-%d %H:%M:%S')
start_time = time.time()
DC1 = 'music_dcxxx'
CMD = '/usr/local/services/music_agent-1.0/bin/dcapi_python/batch_write_log.sh'
#out = os.popen("ifconfig|grep -A1 -w eth1|awk 'NR==2{print $2}'")
out = os.popen('/usr/local/services/music_agent-1.0/bin/tools_ifconfig')
ips = out.read().rstrip('\n')
print ips
ip = ips + ':' + '9092'
curr_dir = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset'
admin = KafkaAdminClient(bootstrap_servers=ip)
consumer = KafkaConsumer(bootstrap_servers=ip)
topics = admin.list_topics()
#cluster = [i['host']+':'+str(i['port']) for i in a.describe_cluster()['brokers']]
def report_dc(cmd,dcid,file):
_code,_d = commands.getstatusoutput("{} {} {}".format(cmd,dcid,file))
print _code,_d
#def retention_files(path,n):
# files = os.listdir(path)
# if len(files) > n:
# oldfile = sorted(files)[0]
# old_path_file = os.path.join(path,oldfile)
# print old_path_file
# os.remove(old_path_file)
partitions = []
for topic in topics:
for i in consumer.partitions_for_topic(topic):
partitions.append(TopicPartition(topic=topic,partition=i))
begin = consumer.beginning_offsets(partitions)
end = consumer.end_offsets(partitions)
print '-------------------'
partition_offset = {}
topic_offset = {}
offsets,end_offset,topics,topic_rate = {},{},{},{}
for k in begin:
if k in end:
partition_offset[k] = [begin[k],end[k]]
topic_offset[k] = end[k] -begin[k]
if k.topic not in end_offset:
end_offset[k.topic] = 0
end_offset[k.topic] += end[k]
for k in topic_offset:
if k.topic != '__consumer_offset':
if k.topic not in offsets:
offsets[k.topic] = 0
offsets[k.topic] += topic_offset[k]
if os.path.exists('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset') as f1:
for line in f1:
topics[line.split()[0]] = line.split()[1]
for t in end_offset:
try:
topic_rate[t] = end_offset[t] - int(topics[t])
except Exception as e:
topic_rate[t] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset','w') as f2:
for tp,endoff in end_offset.items():
f2.write(tp+' ' + str(endoff) + '\n')
######分区消费offset和当前offset
#if not os.path.exists(curr_dir + '/../data/music_agent/kafka'):
# os.mkdir(curr_dir + '/usr/local/services/music_agent-1.0/data/music_agent/kafka/')
file1 = curr_dir + '/../../data/music_agent/kafka/topic_partition_consumer_offset' + datetime.datetime.now().strftime('-%H-%M')
consumer_partition = {}
consumer_lag,consumer_off,consumer_rate,consumers = {},{},{},{}
for i,_ in admin.list_consumer_groups():
consumer_host = {}
out = os.popen("{}/go_saram_tool -host {} -version 1.1.0 -group {}|grep -B1 partitions".format(curr_dir,ip,i))
g_host = out.read().replace(', ',';').replace(',\t',';')
group_host = re.sub('/(?=(?:\d+.){3}\d+)',';',g_host).split('\n')
group_host = [ s for s in group_host if '--' not in s and s]
for j in range(len(group_host)):
group_host[j] = re.sub('\t\w+:','',group_host[j]).replace('\t','').strip()
for r in range(2*len(group_host)):
try:
if re.search(r'\[\d.*?\]',group_host[r]) and re.search(r'\[\d.*?\]',group_host[r+1]):
group_host.insert(r+1,group_host[r-1])
except Exception as e:
break
for m in range(0,len(group_host),2):
try:
if ' ' in group_host[m+1].split(';')[1].strip():
for p in group_host[m+1].split(';')[1].strip().replace('[','').replace(']','').split():
consumer_host['{};[{}]'.format(group_host[m+1].split(';')[0].strip(),p)] = group_host[m].split(';')
else:
consumer_host[group_host[m+1].replace(' ','')] = group_host[m].split(';')
except Exception as e:
print e
print group_host
for k,v in admin.list_consumer_group_offsets(i).items():
#consumer_partition.append((i,k.topic,k.partition,v.offset,partition_offset[k][1],partition_offset[k][1]-v.offset))
#if '{};[{}]'.format(k.topic,k.partition) in consumer_host:
try:
if consumer_host:
consumer_partition[k,i] = v.offset,consumer_host['{};[{}]'.format(k.topic,k.partition)]
else:
consumer_partition[k,i] = v.offset,['','','']
except Exception as e:
print e
if (k.topic,i) not in consumer_off:
consumer_off[k.topic,i] = 0
consumer_off[k.topic,i] += v.offset
if (i,k.topic) not in consumer_lag:
consumer_lag[(i,k.topic)] = 0
consumer_lag[(i,k.topic)] += (partition_offset[k][1]-v.offset)
if os.path.isfile('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset')as f3:
for line in f3:
consumers[(line.split()[0],line.split()[1])] = line.split()[2]
for k in consumer_off:
try:
consumer_rate[k] = consumer_off[k] - int(consumers[k])
except:
consumer_rate[k] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset','w') as f4:
for t,off in consumer_off.items():
f4.write(t[0] + ' ' + t[1] + ' ' + str(off) + '\n')
topic_partitionoff ={}
if consumer_partition:
for k,v in partition_offset.items():
for pa,con in consumer_partition:
if k == pa:
topic_partitionoff[k,con] = v[0],v[1],consumer_partition[pa,con]
else:
topic_partitionoff[k,'None']= v[0],v[1],'None'
else:
for k,v in partition_offset.items():
topic_partitionoff[k,'None']= v[0],v[1],'None'
print len(topic_partitionoff)
#path = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset/log'
#retention_files(path,60)
if topic_rate:
with open(file1,'w') as f:
for k,v in topic_partitionoff.items():
if k[1] != 'None':
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&consumer_id={}&consumer_client_id={}&consumer_host={}&topic_rate={}&consumer_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],v[2][0],v[1]-v[0],v[1]-v[2][0],offsets[k[0].topic],consumer_lag[k[1],k[0].topic],v[2][1][0],v[2][1][1],v[2][1][2],topic_rate[k[0].topic],consumer_rate[k[0].topic,k[1]])
else:
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&topic_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],'None',v[1]-v[0],'None',offsets[k[0].topic],'None',topic_rate[k[0].topic])
f.write(data+'\n')
#数据上报
report_dc(CMD,DC1,file1)
end_time = time.time()
print end_time -start_time
1.2 topic和分区特别多,可以用python的多线程跑
#!/usr/bin/env python
# -* - coding:UTF-8 -*-
import os,re
import datetime,time,commands
from kafka.admin import KafkaAdminClient
from kafka import KafkaConsumer
from kafka.structs import TopicPartition
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED,FIRST_COMPLETED,as_completed
#start_time = datetime.datetime.now().replace(microsecond=0).strftime('%Y-%m-%d %H:%M:%S')
start_time = time.time()
DC1 = 'music_dc0507'
CMD = '/usr/local/services/music_agent-1.0/bin/dcapi_python/batch_write_log.sh'
#out = os.popen("ifconfig|grep -A1 -w eth1|awk 'NR==2{print $2}'")
out = os.popen('/usr/local/services/music_agent-1.0/bin/tools_ifconfig')
ips = out.read().rstrip('\n')
print ips
ip = ips + ':' + '9092'
curr_dir = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset'
admin = KafkaAdminClient(bootstrap_servers=ip)
consumer = KafkaConsumer(bootstrap_servers=ip)
topics = admin.list_topics()
#cluster = [i['host']+':'+str(i['port']) for i in a.describe_cluster()['brokers']]
def report_dc(cmd,dcid,file):
_code,_d = commands.getstatusoutput("{} {} {}".format(cmd,dcid,file))
print _code,_d
#def retention_files(path,n):
# files = os.listdir(path)
# if len(files) > n:
# oldfile = sorted(files)[0]
# old_path_file = os.path.join(path,oldfile)
# print old_path_file
# os.remove(old_path_file)
#partitions = []
#for topic in topics:
# for i in consumer.partitions_for_topic(topic):
# partitions.append(TopicPartition(topic=topic,partition=i))
#begin = consumer.beginning_offsets(partitions)
#end = consumer.end_offsets(partitions)
begin = {}
end = {}
n=0
def work_offsets(topic):
partitions = []
try:
for i in consumer.partitions_for_topic(topic):
partitions.append(TopicPartition(topic=topic,partition=i))
begin1 = consumer.beginning_offsets(partitions)
end1 = consumer.end_offsets(partitions)
global n
n+=1
print n
except Exception as e:
print e
begin1,end1={},{}
return begin1,end1
with ThreadPoolExecutor(1000) as executor:
#executor.map(work_offsets,topics)
all_task =[executor.submit(work_offsets,topic) for topic in topics]
#wait(all_task,timout=return_when=ALL_COMPLETED)
for task in as_completed(all_task):
begin.update(task.result()[0])
end.update(task.result()[1])
print '-------------------'
partition_offset = {}
topic_offset = {}
offsets,end_offset,topics,topic_rate = {},{},{},{}
for k in begin:
if k in end:
partition_offset[k] = [begin[k],end[k]]
topic_offset[k] = end[k] -begin[k]
if k.topic not in end_offset:
end_offset[k.topic] = 0
end_offset[k.topic] += end[k]
for k in topic_offset:
if k.topic != '__consumer_offset':
if k.topic not in offsets:
offsets[k.topic] = 0
offsets[k.topic] += topic_offset[k]
if os.path.exists('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset') as f1:
for line in f1:
topics[line.split()[0]] = line.split()[1]
for t in end_offset:
try:
topic_rate[t] = end_offset[t] - int(topics[t])
except Exception as e:
topic_rate[t] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset','w') as f2:
for tp,endoff in end_offset.items():
f2.write(tp+' ' + str(endoff) + '\n')
######分区消费offset和当前offset
#if not os.path.exists(curr_dir + '/../data/music_agent/kafka'):
# os.mkdir(curr_dir + '/usr/local/services/music_agent-1.0/data/music_agent/kafka/')
file1 = curr_dir + '/../../data/music_agent/kafka/topic_partition_consumer_offset' + datetime.datetime.now().strftime('-%H-%M')
consumer_partition = {}
consumer_lag,consumer_off,consumer_rate,consumers = {},{},{},{}
def consumer_offsets(gp):
#for i,_ in admin.list_consumer_groups():
consumer_host = {}
out = os.popen("{}/go_saram_tool -host {} -version 1.1.0 -group {}|grep -B1 partitions".format(curr_dir,ip,gp))
g_host = out.read().replace(', ',';').replace(',\t',';')
group_host = re.sub('/(?=(?:\d+.){3}\d+)',';',g_host).split('\n')
group_host = [ s for s in group_host if '--' not in s and s]
for j in range(len(group_host)):
group_host[j] = re.sub('\t\w+:','',group_host[j]).replace('\t','').strip()
for r in range(2*len(group_host)):
try:
if re.search(r'\[\d.*?\]',group_host[r]) and re.search(r'\[\d.*?\]',group_host[r+1]):
group_host.insert(r+1,group_host[r-1])
except Exception as e:
break
for m in range(0,len(group_host),2):
try:
if ' ' in group_host[m+1].split(';')[1].strip():
for p in group_host[m+1].split(';')[1].strip().replace('[','').replace(']','').split():
consumer_host['{};[{}]'.format(group_host[m+1].split(';')[0].strip(),p)] = group_host[m].split(';')
else:
consumer_host[group_host[m+1].replace(' ','')] = group_host[m].split(';')
except Exception as e:
print e
print group_host
for k,v in admin.list_consumer_group_offsets(gp).items():
#consumer_partition.append((i,k.topic,k.partition,v.offset,partition_offset[k][1],partition_offset[k][1]-v.offset))
#if '{};[{}]'.format(k.topic,k.partition) in consumer_host:
try:
if consumer_host:
consumer_partition[k,gp] = v.offset,consumer_host['{};[{}]'.format(k.topic,k.partition)]
else:
consumer_partition[k,gp] = v.offset,['','','']
except Exception as e:
print e
try:
if (k.topic,gp) not in consumer_off:
consumer_off[k.topic,gp] = 0
consumer_off[k.topic,gp] += v.offset
if (gp,k.topic) not in consumer_lag:
consumer_lag[(gp,k.topic)] = 0
consumer_lag[(gp,k.topic)] += (partition_offset[k][1]-v.offset)
except:
pass
return consumer_partition,consumer_off,consumer_lag
groups = admin.list_consumer_groups()
with ThreadPoolExecutor(max_workers=1000) as executor:
all_tasks = [executor.submit(consumer_offsets,group[0]) for group in groups]
for task in as_completed(all_tasks):
consumer_partition.update(task.result()[0])
consumer_off.update(task.result()[1])
consumer_lag.update(task.result()[2])
if os.path.isfile('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset')as f3:
for line in f3:
consumers[(line.split()[0],line.split()[1])] = line.split()[2]
for k in consumer_off:
try:
consumer_rate[k] = consumer_off[k] - int(consumers[k])
except:
consumer_rate[k] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset','w') as f4:
for t,off in consumer_off.items():
f4.write(t[0] + ' ' + t[1] + ' ' + str(off) + '\n')
topic_partitionoff ={}
if consumer_partition:
for k,v in partition_offset.items():
for pa,con in consumer_partition:
if k == pa:
topic_partitionoff[k,con] = v[0],v[1],consumer_partition[pa,con]
else:
topic_partitionoff[k,'None']= v[0],v[1],'None'
else:
for k,v in partition_offset.items():
topic_partitionoff[k,'None']= v[0],v[1],'None'
print len(topic_partitionoff)
#path = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset/log'
#retention_files(path,60)
if topic_rate:
with open(file1,'w') as f:
for k,v in topic_partitionoff.items():
if k[1] != 'None':
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&consumer_id={}&consumer_client_id={}&consumer_host={}&topic_rate={}&consumer_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],v[2][0],v[1]-v[0],v[1]-v[2][0],offsets[k[0].topic],consumer_lag[k[1],k[0].topic],v[2][1][0],v[2][1][1],v[2][1][2],topic_rate[k[0].topic],consumer_rate[k[0].topic,k[1]])
else:
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&topic_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],'None',v[1]-v[0],'None',offsets[k[0].topic],'None',topic_rate[k[0].topic])
f.write(data+'\n')
report_dc(CMD,DC1,file1)
end_time = time.time()
print end_time -start_time
1.3 优化版的,主要是优化了go代码获取到的消费组详情的对应group topic的ip关系,原代码在有些特殊的可能会获取不到,此代码在一个消费组对应多个topic的情况没有验证,大家可以验证一下,理论上应该没问题
#!/usr/bin/env python
# -* - coding:UTF-8 -*-
import os,re
import datetime,time,commands
from kafka.admin import KafkaAdminClient
from kafka import KafkaConsumer
from kafka.structs import TopicPartition
#start_time = datetime.datetime.now().replace(microsecond=0).strftime('%Y-%m-%d %H:%M:%S')
start_time = time.time()
DC1 = 'music_dc0507'
CMD = '/usr/local/services/music_agent-1.0/bin/dcapi_python/batch_write_log.sh'
#out = os.popen("ifconfig|grep -A1 -w eth1|awk 'NR==2{print $2}'")
out = os.popen('/usr/local/services/music_agent-1.0/bin/tools_ifconfig')
ips = out.read().rstrip('\n')
print ips
ip = ips + ':' + '9092'
curr_dir = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset'
admin = KafkaAdminClient(bootstrap_servers=ip)
consumer = KafkaConsumer(bootstrap_servers=ip)
topics = admin.list_topics()
#cluster = [i['host']+':'+str(i['port']) for i in a.describe_cluster()['brokers']]
def report_dc(cmd,dcid,file):
_code,_d = commands.getstatusoutput("{} {} {}".format(cmd,dcid,file))
print _code,_d
#def retention_files(path,n):
# files = os.listdir(path)
# if len(files) > n:
# oldfile = sorted(files)[0]
# old_path_file = os.path.join(path,oldfile)
# print old_path_file
# os.remove(old_path_file)
partitions = []
for topic in topics:
for i in consumer.partitions_for_topic(topic):
partitions.append(TopicPartition(topic=topic,partition=i))
begin = consumer.beginning_offsets(partitions)
end = consumer.end_offsets(partitions)
print '-------------------'
partition_offset = {}
topic_offset = {}
offsets,end_offset,topics,topic_rate = {},{},{},{}
for k in begin:
if k in end:
partition_offset[k] = [begin[k],end[k]]
topic_offset[k] = end[k] -begin[k]
if k.topic not in end_offset:
end_offset[k.topic] = 0
end_offset[k.topic] += end[k]
for k in topic_offset:
if k.topic != '__consumer_offset':
if k.topic not in offsets:
offsets[k.topic] = 0
offsets[k.topic] += topic_offset[k]
if os.path.exists('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset') as f1:
for line in f1:
topics[line.split()[0]] = line.split()[1]
for t in end_offset:
try:
topic_rate[t] = end_offset[t] - int(topics[t])
except Exception as e:
topic_rate[t] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/topic_offset','w') as f2:
for tp,endoff in end_offset.items():
f2.write(tp+' ' + str(endoff) + '\n')
######分区消费offset和当前offset
#if not os.path.exists(curr_dir + '/../data/music_agent/kafka'):
# os.mkdir(curr_dir + '/usr/local/services/music_agent-1.0/data/music_agent/kafka/')
file1 = curr_dir + '/../../data/music_agent/kafka/topic_partition_consumer_offset' + datetime.datetime.now().strftime('-%H-%M')
consumer_partition = {}
consumer_lag,consumer_off,consumer_rate,consumers = {},{},{},{}
for i,_ in admin.list_consumer_groups():
consumer_host = {}
out = os.popen("{}/go_saram_tool -host {} -version 1.1.0 -group {}|grep -B1 partitions".format(curr_dir,ip,i))
# g_host = out.read().replace(', ',';').replace(',\t',';')
# group_host = re.sub('/(?=(?:\d+.){3}\d+)',';',g_host).split('\n')
# group_host = [ s for s in group_host if '--' not in s and s]
# for j in range(len(group_host)):
# group_host[j] = re.sub('\t\w+:','',group_host[j]).replace('\t','').strip()
# for r in range(2*len(group_host)):
# try:
# if re.search(r'\[\d.*?\]',group_host[r]) and re.search(r'\[\d.*?\]',group_host[r+1]):
# group_host.insert(r+1,group_host[r-1])
# except Exception as e:
# break
# print group_host
# for m in range(0,len(group_host),2):
# try:
# if ' ' in group_host[m+1].split(';')[1].strip():
# for p in group_host[m+1].split(';')[1].strip().replace('[','').replace(']','').split():
# consumer_host['{};[{}]'.format(group_host[m+1].split(';')[0].strip(),p)] = group_host[m].split(';')
# print consumer_host
# else:
# consumer_host[group_host[m+1].replace(' ','')] = group_host[m].split(';')
# except Exception as e:
# print e
# print group_host
group_host=out.read().replace('\n\t\t','\t').split('\n\t')
for h in group_host:
h=re.sub('\w+:','',h)
host = h.split('\t')
host = [ht.strip(', ').strip('\n') for ht in host if ht ]
host[2] = host[2].strip('/')
#consumer_host['{};{}'.format(host[-2],host[-1])]=host[:-2]
partitions = host[-1].replace('[','').replace(']','').split(' ')
for par in partitions:
consumer_host['{};[{}]'.format(host[-2],par)]=host[:-2]
for k,v in admin.list_consumer_group_offsets(i).items():
#consumer_partition.append((i,k.topic,k.partition,v.offset,partition_offset[k][1],partition_offset[k][1]-v.offset))
#if '{};[{}]'.format(k.topic,k.partition) in consumer_host:
try:
if consumer_host:
consumer_partition[k,i] = v.offset,consumer_host['{};[{}]'.format(k.topic,k.partition)]
else:
consumer_partition[k,i] = v.offset,['','','']
except Exception as e:
print e
if (k.topic,i) not in consumer_off:
consumer_off[k.topic,i] = 0
consumer_off[k.topic,i] += v.offset
if (i,k.topic) not in consumer_lag:
consumer_lag[(i,k.topic)] = 0
consumer_lag[(i,k.topic)] += (partition_offset[k][1]-v.offset)
if os.path.isfile('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset'):
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset')as f3:
for line in f3:
consumers[(line.split()[0],line.split()[1])] = line.split()[2]
for k in consumer_off:
try:
consumer_rate[k] = consumer_off[k] - int(consumers[k])
except:
consumer_rate[k] = None
with open('/usr/local/services/music_agent-1.0/bin/kafka_get_offset/consumer_offset','w') as f4:
for t,off in consumer_off.items():
f4.write(t[0] + ' ' + t[1] + ' ' + str(off) + '\n')
topic_partitionoff ={}
if consumer_partition:
for k,v in partition_offset.items():
for pa,con in consumer_partition:
if k == pa:
topic_partitionoff[k,con] = v[0],v[1],consumer_partition[pa,con]
else:
topic_partitionoff[k,'None']= v[0],v[1],'None'
else:
for k,v in partition_offset.items():
topic_partitionoff[k,'None']= v[0],v[1],'None'
print len(topic_partitionoff)
#path = '/usr/local/services/music_agent-1.0/bin/kafka_get_offset/log'
#retention_files(path,60)
if topic_rate:
with open(file1,'w') as f:
for k,v in topic_partitionoff.items():
if k[1] != 'None':
try:
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&consumer_id={}&consumer_client_id={}&consumer_host={}&topic_rate={}&consumer_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],v[2][0],v[1]-v[0],v[1]-v[2][0],offsets[k[0].topic],consumer_lag[k[1],k[0].topic],v[2][1][0],v[2][1][1],v[2][1][2],topic_rate[k[0].topic],consumer_rate[k[0].topic,k[1]])
except:
print k,v
else:
data = "topicname={}&partition={}&groupid={}&startoffset={}&endoffset={}¤toffset={}&partition_offset={}&lag={}&totaloffst={}&total_lag={}&topic_rate={}".format(k[0].topic,k[0].partition,k[1],v[0],v[1],'None',v[1]-v[0],'None',offsets[k[0].topic],'None',topic_rate[k[0].topic])
f.write(data+'\n')
report_dc(CMD,DC1,file1)
end_time = time.time()
print end_time -start_time
2.go代码获取消费和topic的offset详情以及ip
这里大家可以优化以及计算速率,此处并未给出计算速率等方法,后续有时间在优化
package main
import (
"fmt"
"strconv"
"strings"
"github.com/Shopify/sarama"
)
func main() {
brokers := []string{"9.235.135.8:9092"}
admin := admin_api(brokers)
topics := topics(admin)
client := client_api(brokers)
topic_offset := topic_partition_offset(topics, client)
groups := consumer_groups(admin)
group_off := consumers_off(admin, client, groups)
no_group_topic_off := make(map[string][]int64)
// for k,v := topic_offset {
// for g,off := group_off{
// if strings.ContainsAny(g,strings.Split(k,":")[0])
// }
// if strings.ContainsAny()
// }
// for group, off := range group_off {
// tp := strings.Split(group, ":")[0]
// par := strings.Split(group, ":")[2]
// toff := topic_offset[tp+":"+par]
// off = append(off, strconv.FormatInt(toff[0], 10), strconv.FormatInt(toff[1], 10))
// fmt.Println(group, off)
// }
fmt.Println(len(topic_offset))
m := 0
for tpar, off := range topic_offset {
topic := strings.Split(tpar, ":")[0]
partition := strings.Split(tpar, ":")[1]
for groupp, host := range group_off {
if strings.Contains(groupp, topic) && strings.Contains(groupp, partition) {
host = append(host, strconv.FormatInt(off[0], 10), strconv.FormatInt(off[1], 10))
fmt.Println(groupp, host)
m++
} else {
no_group_topic_off[tpar] = off
}
}
}
n := 0
for k, v := range no_group_topic_off {
fmt.Println(k, "-----", v)
n++
}
fmt.Println(m, n)
}
// type all_api struct {
// admin func
// client func
// }
func admin_api(brokers []string) sarama.ClusterAdmin {
admin, err := sarama.NewClusterAdmin(brokers, nil)
if err != nil {
fmt.Println("broker not available")
}
return admin
}
func client_api(brokers []string) sarama.Client {
client, err := sarama.NewClient(brokers, nil)
if err != nil {
fmt.Println("client is not connected")
}
return client
}
func topics(admin sarama.ClusterAdmin) map[string]sarama.TopicDetail {
topics, err := admin.ListTopics()
if err != nil {
fmt.Println("获取topic失败")
}
// for k, v := range topics {
// fmt.Println(k, v.NumPartitions)
// }
return topics
}
func consumer_groups(admin sarama.ClusterAdmin) []string {
gs := make([]string, 100)
groups, err := admin.ListConsumerGroups()
if err != nil {
fmt.Println("获取groups失败")
}
for group, _ := range groups {
gs = append(gs, group)
}
return gs
}
func topic_partition_offset(topics map[string]sarama.TopicDetail, client sarama.Client) map[string][]int64 {
// c := make(map[string]map[int][]int64)
top_off := make(map[string][]int64)
for tp, _ := range topics {
partitions, err := client.Partitions(tp)
if err != nil {
fmt.Println("获取%s分区失败", tp)
}
for p := range partitions {
oldoffset, err := client.GetOffset(tp, int32(p), sarama.OffsetOldest)
if err != nil {
fmt.Println("获取%s的%d分区oldoffset出错", tp, p)
}
newoffset, err := client.GetOffset(tp, int32(p), sarama.OffsetNewest)
if err != nil {
fmt.Println("获取%s的%d分区newoffset出错", tp, p)
}
// fmt.Println(tp, p, oldoffset, newoffset)
top_off[tp+":"+strconv.Itoa(p)] = []int64{oldoffset, newoffset}
// c[tp] = append(c[tp], []int64{int64(p), oldoffset, newoffset})
}
}
return top_off
}
func consumers_off(admin sarama.ClusterAdmin, client sarama.Client, groups []string) map[string][]string {
consumer_info, err := admin.DescribeConsumerGroups(groups)
if err != nil {
fmt.Println("获取group详情失败")
}
// group_info := make(map[string]map[string][]string)
hosts := make(map[string][]string)
group_topic := make(map[string][]string)
for _, cn := range consumer_info {
// state := cn.State
group := cn.GroupId
g_topics := []string{}
for _, v := range cn.Members {
// fmt.Println(v.ClientHost, v.ClientId, v.MemberId)
gmb, err := v.GetMemberAssignment()
if err != nil {
fmt.Println("获取member信息失败")
}
for t, par := range gmb.Topics {
// fmt.Println(t, par)
flag := true
for i := range g_topics {
if t == g_topics[i] {
flag = false
break
}
}
if flag {
g_topics = append(g_topics, t)
}
group_topic[group] = g_topics
for _, i := range par {
// group_info[t] = append(group_info[t], []string{string(i), v.ClientHost, v.ClientId, v.MemberId})
hosts[t+":"+group+":"+strconv.Itoa(int(i))] = []string{v.ClientHost, v.ClientId, v.MemberId}
// group_info[t] = append(group_info[t], hosts)
}
// group_info[t] = append(group_info[t], []string{intSliceToString(par, ","), v.ClientHost, v.ClientId, v.MemberId})
// group_info[t] = hosts
}
}
}
// for k, v := range hosts {
// fmt.Println(k, v)
// }
// fmt.Println(len(hosts))
// group_offsets := make(map[string]map[string]int64)
par_off := make(map[string]int64)
group_partition_off := make(map[string][]string)
// topic := "uni_archive_track_extend_change"
// client, err := sarama.NewClient(broker, nil)
// if err != nil {
// fmt.Println("client 连接失败")
// return
// }
for group, v := range group_topic {
for _, topic := range v {
partitions, err := client.Partitions(topic)
if err != nil {
fmt.Println("获取topic分区失败")
}
top_partition := map[string][]int32{topic: partitions}
group_offset, err := admin.ListConsumerGroupOffsets(group, top_partition)
if err != nil {
fmt.Println("获取group %soffset失败", group)
}
for k, v := range group_offset.Blocks {
for off, r := range v {
// fmt.Println(k, off, r.Offset)
par_off[k+":"+group+":"+strconv.Itoa(int(off))] = r.Offset
}
// group_offsets[k] = par_off
}
}
}
for k, v := range par_off {
host_off := hosts[k]
host_off = append(host_off, strconv.FormatInt(v, 10))
group_partition_off[k] = host_off
}
return group_partition_off
}