使用python编写SparkStreaming任务监控脚本,checkRealTime.py 脚本内容如下:
# -*-coding:utf-8-*-
import datetime
import time
import subprocess
import requests
import json
import hmac
import hashlib
import base64
import configparser
from urllib.parse import quote_plus
import logging
import sys
import os
DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(DIR)
"""
实时任务监控脚本,失败钉钉群告警
"""
class DDHandler(logging.Handler):
def __init__(self, web_hook_token):
self.__web_hook_token = web_hook_token
logging.Handler.__init__(self)
def emit(self, record, mobile):
msg = record
"""
发送报警信息
"""
content = {
"msgtype": "text",
"text": {
"content": "%s" % msg
},
"at": {
"atMobiles": [
mobile
],
"isAtAll": True
}
}
headers = {
"Content-Type": "application/json ;charset=utf-8 "
}
string_text_msg = json.dumps(content)
res = requests.post(self.__web_hook_token, data=string_text_msg, headers=headers)
if res.text == '{"errcode":0,"errmsg":"ok"}':
print("alarm send success。。。")
else:
print(res.text)
class CheckRealTimeTaskStatus:
def __init__(self):
# 读取配置信息
confInfo = configparser.ConfigParser()
confInfo.read(DIR + '/monitor/dingding_conf', encoding='utf-8')
conf_dict = dict(confInfo["dingding"])
self.secret = conf_dict["secret"]
self.alarm_enabled = conf_dict["alarm_enabled"]
self.access_token = conf_dict["access_token"]
self.mobile = conf_dict["mobile"]
def send(self, msg):
# 机器人签名(新版本的机器人需要验证签名):
timestamp = int(round(time.time() * 1000))
secret_enc = bytes(self.secret, encoding="utf-8")
string_to_sign = '{}\n{}'.format(timestamp, self.secret)
string_to_sign_enc = bytes(string_to_sign, encoding="utf-8")
hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
sign = quote_plus(base64.b64encode(hmac_code))
if self.alarm_enabled:
ddhandler = DDHandler(
'https://oapi.dingtalk.com/robot/send?access_token={0}×tamp={1}&sign={2}'
.format(self.access_token, timestamp, sign))
ddhandler.setLevel(logging.ERROR)
ddhandler.emit(msg, self.mobile)
def check(self):
cur_time = time.strftime('%H:%M')
if cur_time == '23:59' or cur_time == '23:58':
CheckRealTimeTaskStatus.send(self, '我是定时(实时任务)检测,我还活着~')
# 监控任务列表
tasks = {
'kdw': '某某某'
}
lackList = []
repeatList = []
for item, value in tasks.items():
# /etc/spark2/conf/yarn-conf 下面放的是hadoop的几个配置文件
# 加上--config 以及后面了是为了防止打印 INFO 日志信息
# yarn --config /etc/spark2/conf/yarn-conf application -list | grep 'kdw' | awk '{print $2}' | wc -l
cmd = '''
yarn --config /etc/spark2/conf/yarn-conf application -list | grep '%s' | awk '{print $2}' | wc -l
''' % item
code, res = subprocess.getstatusoutput(cmd)
print(code)
print(res)
if int(res) == 0:
lack = '任务缺失:' + item + '.责任人:' + value + '\n'
lackList.append(lack)
if int(res) > 1:
repeat = '任务重复:' + item + '.责任人:' + value + '\n'
repeatList.append(repeat)
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(now, '缺失任务列表:', lackList)
print(now, '重复任务列表:', repeatList)
if len(lackList) != 0:
CheckRealTimeTaskStatus.send(self, str(lackList))
if len(repeatList) != 0:
CheckRealTimeTaskStatus.send(self, str(repeatList))
# 定时调度执行检测任务
def scheduleJob():
CheckRealTimeTaskStatus().check()
if __name__ == "__main__":
scheduleJob()
conf配置文件: dingding_conf
[dingding]
ALARM_ENABLED = True
ACCESS_TOKEN = *****
SECRET = *****
MOBILE = ******
使用crontab进行定时调度检测脚本,1分钟检测一次
crontab -e (编辑以下内容,保存即可,日志文件会自动创建)
*/1 * * * * /home/work/local/anaconda3/bin/python /home/jump-work/test_job/monitor/checkRealTime.py >> /home/jump-work/test_job/monitor/crontest.log 2>&1