#!/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import requests, time, re
def getcolumn(status, x):
if float(status) < 80:
return "正常"
# elif float(status) < 2:
# return "异常"
else:
return "异常"
def query(url, promql):
"""
不推荐:
params = 'query=round(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ),0.01)'
:param url:
:param promql: PromQL表达式
:return:
"""
queryurl = url + '/api/v1/query'
params = {'query': promql}
queryres = requests.get(queryurl, params=params)
if queryres.json().get('status') == 'error':
print(queryres.json().get('error'))
return queryres.json().get('error')
else:
metrics = queryres.json().get("data").get("result")
if metrics:
return metrics
else:
print('无法获取有效数据')
return '无法获取有效数据'
def queryrange(url, promql, starttime, endtime, step):
"""
:param url:
:param promql: PromQL表达式
:param starttime: rfc3339('2023-08-21T02:10:10.000+08:00') | unix_timestamp
:param endtime: rfc3339('2023-08-24T02:13:00.000+08:00') | unix_timestamp
:param step: <duration | float> : 查询时间步长,时间区间内每step秒执行一次
:return:
"""
# rangeparams = {
# 'query': 'round(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ),0.01)',
# 'start': '2023-08-21T02:10:10.000+08:00', 'end': '2023-08-24T02:13:00.000+08:00', 'step': '60s'}
rangeurl = url + '/api/v1/query_range'
rangeparams = {'query': promql, 'start': starttime, 'end': endtime, 'step': step}
rangeres = requests.get(rangeurl, params=rangeparams)
metrics = rangeres.json().get("data").get("result")
if metrics:
return metrics
else:
print('无法获取有效数据')
def cpuresolv(result, nu):
# re模块匹配中文u'[\u4e00-\u9fa5]+'(匹配所有中文的unicode类型)
prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
cpulist = []
for i in result:
cpudict = {}
cpudict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
cpudict['服务器名'] = i.get('metric').get('nodename')
cpudict['CPU' + str(nu) + '分钟使用率(%)'] = "%.2f" % float(i.get('value')[1])
cpudict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
cpulist.append(cpudict)
return cpulist
def cputosheet(url):
cpu1m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[1m])) by (instance,nodename) * 100 ))'
cpu5m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance,nodename) * 100 ))'
cpu15m = 'abs(100-(avg(irate(node_cpu_seconds_total{mode="idle"}[15m])) by (instance,nodename) * 100 ))'
res1m = cpuresolv(query(url, cpu1m), '一')
res5m = cpuresolv(query(url, cpu5m), '五')
res15m = cpuresolv(query(url, cpu15m), '十五')
left = pd.DataFrame(res1m, columns=['instance', '服务器名', 'CPU一分钟使用率(%)'])
middle = pd.DataFrame(res5m, columns=['服务器名', 'CPU五分钟使用率(%)'])
right = pd.DataFrame(res15m, columns=['服务器名', 'CPU十五分钟使用率(%)', '时间'])
cpuuse = pd.merge(pd.merge(left, middle, on='服务器名'), right, on='服务器名')
# 对dataframe新增加一列名为'状态',值由'CPU十五分钟使用率(%)'一列而来,如果小于80为正常,80以上为异常。其中axis = 1,表示原有dataframe的行不变,列的维数发生改变
cpuuse['状态'] = cpuuse.apply(lambda x: getcolumn(x['CPU十五分钟使用率(%)'], x), axis=1)
cpuuse.index = [i for i in range(1, len(cpuuse.index) + 1)]
return cpuuse
def memoryresolv(result):
prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
memorylist = []
for i in result:
memdict = {}
memdict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
memdict['服务器名'] = i.get('metric').get('nodename')
memdict['内存使用率(%)'] = "%.2f" % float(i.get('value')[1])
memdict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
memdict['状态'] = '正常' if float(i.get('value')[1]) < 80 else '异常'
memorylist.append(memdict)
return memorylist
def memorytosheet(url):
memorypromql = '(node_memory_MemTotal_bytes-node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes*100 > 1'
memres = memoryresolv(query(url, memorypromql))
memuse = pd.DataFrame(memres, columns=['instance', '服务器名', '内存使用率(%)', '时间', '状态'])
memuse.index = [i for i in range(1, len(memuse.index) + 1)]
return memuse
def diskresolv(result):
prog = re.compile(u'[\u4e00-\u9fa5]+\w+[\u4e00-\u9fa5]+')
disklist = []
for i in result:
diskdict = {}
diskdict['instance'] = prog.findall(i.get('metric').get('instance'))[0]
diskdict['服务器名'] = i.get('metric').get('nodename')
diskdict['分区'] = i.get('metric').get('mountpoint')
diskdict['分区使用率(%)'] = "%.2f" % float(i.get('value')[1])
diskdict['时间'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(i.get('value')[0]))
diskdict['状态'] = '正常' if float(i.get('value')[1]) < 80 else '异常'
disklist.append(diskdict)
return disklist
def disktosheet(url):
diskpromql = 'round((node_filesystem_size_bytes{fstype=~"xfs|ext4",mountpoint=~"/|/app"} - node_filesystem_free_bytes) / node_filesystem_size_bytes * 100 >1,0.01)'
diskres = diskresolv(query(url, diskpromql))
diskuse = pd.DataFrame(diskres, columns=['instance', '服务器名', '分区', '分区使用率(%)', '时间', '状态'])
diskuse.index = [i for i in range(1, len(diskuse.index) + 1)]
return diskuse
if __name__ == '__main__':
nowtime = time.strftime("-%Y%m%d-%H")
url = 'http://Prometheus:30003'
cpusheet = cputosheet(url)
memcheet = memorytosheet(url)
disksheet = disktosheet(url)
with pd.ExcelWriter('服务器状态{}.xlsx'.format(nowtime)) as f:
cpusheet.to_excel(f, sheet_name='cpu')
memcheet.to_excel(f, sheet_name='内存')
disksheet.to_excel(f, sheet_name='磁盘')
参考链接:
https://prometheus.fuckcloudnative.io/di-san-zhang-prometheus/di-4-jie-cha-xun/operators
https://blog.csdn.net/qq_38727995/article/details/124459704 # pandas dataframe的apply方法
https://blog.csdn.net/weixin_47661174/article/details/124697842 # Pandas DataFrame的合并