AWS 告警邮件太多容易被忽略,公司维护着十几家的平台,都是在用Redshift,磁盘满了邮件又没看到就麻烦了。
接口调用员老叶就想把告警发到企业微信上来,,以后学废了再来改进脚本,先解决现在的问题。
企业微信效果:
取到存储利用率把调用频率增加,在加个判断就可以告警了
#!/usr/bin/env python3
# coding=utf-8
import boto3
import pytz
import time
import pymysql
import sys
import requests
import json
import datetime
def out_log(text):
# 输出到日志
with open(f'{sys.argv[0].split(".")[0]}.log', 'a') as f:
print(text, file=f)
def msg(text):
#发送企业微信
headers = {'Content-Type': 'application/json;charset=utf-8'}
api_url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key="
json_text = {
"msgtype": "text",
"text": {
"content": text
},
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
# 美国时间改中国时间
now_time = datetime.datetime.now()
cur_time = now_time + datetime.timedelta(hours=8)
cur_time = cur_time.strftime("%Y-%m-%d %H:%M:%S")
utc_tz = pytz.timezone('UTC')
# 时间范围
td = datetime.timedelta(days=0, seconds=60, microseconds=0)
#清空日志文件,用于存放巡检信息
with open(f'{sys.argv[0].split(".")[0]}.log', 'w') as f:
f.write(f"Amazon Redshift 巡检:{cur_time}\n")
#连接数据库
db = pymysql.connect("127.0.0.1", user="root", passwd="root", db="zwy")
cursor = db.cursor()
cursor.execute("SELECT systemname,accesskeyid,secretkey,region FROM `system`")
#数据库里面取出每家平台的access_key 和region AWS账号IAM提供
for row in cursor.fetchall():
system_name = row[0]
aws_access_key_id = row[1]
aws_secret_access_key = row[2]
region_name = row[3]
if system_name in ['ecsdk', 'imobsky', '']:
continue
client = boto3.client('redshift', aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name)
response = client.describe_clusters()
#先查询Redshift 的实例名称,有的平台有多个实例
for clusters in response['Clusters']:
db_name = clusters['ClusterIdentifier']
clusteridentifier = clusters['ClusterIdentifier']
client = boto3.client('cloudwatch', aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name)
#从cloudwatch中取出存储利用率,时间范围定义最新的60S差不多就是当前的利用率了,其他参数同理
response = client.get_metric_statistics(
Namespace='AWS/Redshift',
MetricName='PercentageDiskSpaceUsed',
Dimensions=[
{
'Name': 'ClusterIdentifier',
'Value': clusteridentifier
},
],
StartTime=(datetime.datetime.utcnow() - td).isoformat(),
EndTime=datetime.datetime.utcnow().isoformat(),
Period=60,
Statistics=[
'Maximum'
],
)
for data in response['Datapoints']:
disk_used = data['Maximum']
#获取到的数据写入日志
out_log(f"系统:{system_name}\t实例:{db_name}\t存储空间使用率:%.2f%%" % disk_used)
'''#轮询的间隔调短再加入判断就可以监控了
if disk_used >= 95:
msg(f"{cur_time} 系统:{system_name} Redshift:{db_name} 存储空间使用率:%.2f%% 请注意清理" % disk_used)
'''
#从日志中读取所有系统的信息发送到企业微信
with open(f'{sys.argv[0].split(".")[0]}.log', 'r') as f:
#print(f.read())
msg(f.read())
db.close()