关于一款zabbix巡检+钉钉预警的脚本
背景: 因每日巡检需要,且人工巡检复杂度大,所以需要一个持续化自动化的脚本替代。
功能:通过配置crontab内容,达到每日定时巡检,并获取资源可用率达到75%以下的主机监控项,排序后告警出来。需提前安装requests模块。
因为是脚本,所以写得很随意,但是基本达到改改前置参数就能用的程度。
#!/usr/bin/python3
# _*_ coding:utf-8 _*_
import json
import requests
# zabbix服务器的IP地址
zabbix_ip = ""
# zabbix的用户名
zabbix_user = ""
# zabbix的密码
zabbix_pass = ""
# 钉钉告警的webhook
api_url = ""
# 指定需要预警的监控项
itemsKey = ["system.cpu.util[,idle]", "vm.memory.size[pavailable]", "vfs.fs.size[/,pfree]"]
# 因为有排序需要,定义几个字典
cpuDict = {}
memDict = {}
diskDict = {}
# 获取数据字典,若变更巡检项需要同步修改
itemsDict = {0: cpuDict, 1: memDict, 2: diskDict}
# 获取告警标题名,若变更巡检项需要同步修改
alName = {0: "CPU预警", 1: "内存预警", 2: "磁盘预警"}
# 定义预警值
almData = 25
# zabbix api接口地址
url = "http://" + zabbix_ip + "/zabbix/api_jsonrpc.php"
# zabbix api定义的访问头部信息
post_header = {'Content-Type': 'application/json'}
listJson = {}
headers = {'Content-Type': 'application/json;charset=utf-8'}
# 调用zabbix api需要身份令牌auth
def get_auth():
post_data = {
"jsonrpc": "2.0",
"method": "user.login",
"params": {
"user": zabbix_user,
"password": zabbix_pass
},
"id": 1,
"auth": None
}
ret = requests.post(url, data=json.dumps(post_data), headers=post_header)
zabbix_ret = json.loads(ret.text)
if 'result' not in zabbix_ret:
print('login error')
else:
auth = zabbix_ret.get('result')
return auth
# 以IP信息获取主机id
def get_hostinfo(host_id):
hostid_get = {
"jsonrpc": "2.0",
"method": "host.get",
"params": {
"filter": {
"hostid": str(host_id)
},
"output": ["name", "status"]
},
"auth": Token,
"id": 1
}
res2 = requests.post(url, data=json.dumps(hostid_get), headers=post_header)
res3 = res2.json()
# print(res3)
res4 = res3['result']
# print(res4)
try:
hostName = res4[0]['name']
status = res4[0]['status']
return hostName, status
except:
return None, '1'
def get_item(itemKey):
item_get = {
"jsonrpc": "2.0",
"method": "item.get",
"params": {
# "output": ["itemid", "hostid", "name"],
"output": "extend",
"filter": {
"key_": itemKey,
}
},
"auth": Token,
"id": 1
}
result = requests.post(url, data=json.dumps(item_get), headers=post_header)
jsonresult = result.json()
for i in jsonresult["result"]:
if not listJson.__contains__(i["hostid"]):
listJson[i["hostid"]] = {"items": [i["itemid"]]}
else:
oldList = listJson[i["hostid"]]["items"]
oldList.append(i["itemid"])
listJson[i["hostid"]] = {"items": oldList}
# print(listJson)
return listJson
def getHistoryData(item_id):
mParams = {
"jsonrpc": "2.0",
"method": "history.get",
"params": {
"output": "extend",
"history": 0,
"itemids": item_id,
"sortfield": "clock",
"sortorder": "DESC",
"limit": 1
},
"auth": Token,
"id": 1
}
historyResult = requests.post(url, data=json.dumps(mParams), headers=post_header)
historyJson = historyResult.json()
return historyJson
def ding_talk(title, text):
json_text = {
"msgtype": "markdown",
"markdown": {
"title": title,
"text": text
},
"at": {
"isAtAll": True # at为非必须
}
}
print(requests.post(api_url, json.dumps(json_text), headers=headers, verify=False).content)
if __name__ == '__main__':
# logf = open('getHistoryData.log', 'a+')
Token = get_auth()
for itemKey in itemsKey:
listJson = get_item(itemKey) # 获取各个监控项id
for keys in listJson.keys():
hostName, hostStatus = get_hostinfo(keys) # 获取主机名和主机状态
if hostName and hostStatus == '0': # 只有获取到主机名且主机状态开启的服务器才获取
items_list = listJson[keys]['items']
for i in range(0, len(items_list)):
value = getHistoryData(items_list[i])['result'][0]['value']
if int(float(value)) < almData:
itemsDict[i][hostName] = round(float(value), 2) # 当监控取值小于almData才预警,且保留小数后2位
# print(round(float(value), 2))
# 对字典内处理,后加的需求
for j in range(0, 3):
dictSelect = itemsDict[j]
dictSelect = dict(sorted(dictSelect.items(), key=lambda item: item[1])) # 每个字典排序
# 对告警内容做markdown格式数据处理
if len(dictSelect) > 0:
textStr = "# %s\n" % alName[j]
for dictKey in dictSelect.keys():
textStr += "- **%s**: %s \n" % (dictKey, dictSelect[dictKey])
ding_talk(alName[j], textStr)