#自已写一个脚本类似阿里的拨测监控自已的一些服务
具体逻辑:
1、从文件中读取要监控服务地址,每行一个服务
2、每个监控的服务,如果返回200,代表成功
3、如果失败重试3试,都失败了,返回失败 ,这时就发钉钉报警
4、脚本放在定时任务,每3分钟执行一次,同时把输出都打入日志文件
#!/usr/bin/python3
#coding:utf-8
# 2024-07-09
import requests
import json
import time
from datetime import datetime, timedelta
def retry_get(url, max_retries=3, delay=2):
retries = 0
while retries < max_retries:
try:
#增加超时参数
response = requests.get(url,timeout=(2,3))
if response.status_code == 200:
#return response
print(f"{url} 请求成功,状态码: {response.status_code}")
return True
else:
print(f"{url} 请求失败,状态码: {response.status_code}, 进行第 {retries + 1} 次重试")
except requests.exceptions.RequestException as e:
print(f"请求出错: {e}, 进行第 {retries + 1} 次重试")
retries += 1
time.sleep(delay)
print("重试次数达到上限,请求失败")
return False
def send_dingtalk_message(webhook_url, message):
headers = {'Content-Type': 'application/json'}
data = {
"msgtype": "text",
"text": {
"content": message
}
}
response = requests.post(webhook_url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
print("Message sent successfully!")
else:
print("Failed to send message. Status code:", response.status_code)
def main():
# 钉钉机器人 webhook URL
webhook_url = "https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxx"
# 监测的站点 URL
#site_url = "https://www1.exam.com"
filename = "/opt/ops/51devops/monitor/site.url"
current_date = datetime.now()
current_time = current_date.strftime("%Y-%m-%d %H:%M")
print("【" + current_time + "】")
with open(filename, 'r') as file:
lines_url = file.readlines()
for line_url in lines_url:
site_url = line_url.strip()
#if not check_site(site_url):
url = site_url
if not retry_get(url):
message = f"告警通知\n[{current_time}]发生故障 \n站点【{site_url}】故障告警,请及时处理!"
print(message)
send_dingtalk_message(webhook_url, message)
if __name__ == '__main__':
main()