python实现监控sparkStreaming并失败重启
本博客实现了对mysql,hive,sparkStreaming的监控,具体步骤如下:
1.安装依赖
安装wheel
pip install wheel
安装sasl
pip install D:\python\sasl-0.2.1-cp36-cp36m-win_amd64.whl
注:需要下载与python 版本对应的sasl,我的是3.6.5所以选择cp36,64位系统的选择amd64的.
下载地址: https://www.lfd.uci.edu/~gohlke/pythonlibs/#sasl
安装thrift
pip install thrift
安装thrift-sasl
pip install thrift-sasl
安装pyhive
pip install pyhive
安装pymysql
pip install pymysql
安装request
pip install request
2.编写方法实现类
import json
import os
from urllib import request
from datetime import datetime, timedelta,date
import time
from pyhive import hive
import pymysql
import monitorInitFile
def db_connect():
conn = pymysql.connect(
host=monitorInitFile.mysql_host,
port=monitorInitFile.mysql_port,
user=monitorInitFile.mysql_user,
passwd=monitorInitFile.mysql_passwd,
db=monitorInitFile.mysql_db,
charset='utf8'
)
return conn
def hive_connect():
conn = hive.Connection(
host=monitorInitFile.hive_host,
port=monitorInitFile.hive_port,
username=monitorInitFile.hive_username,
auth=monitorInitFile.hive_auth,
database=monitorInitFile.hive_database
)
return conn
def get_date(day):
"""获取昨天日期"""
return (date.today() - timedelta(days=day)).strftime('%Y-%m-%d')
def beforeHours2Date(num):
t =time.time() - num*60*60
t = time.strftime('%Y_%m-%d %H', time.localtime(t))
return t
def checkJob():
'''检查任务运行状态'''
result = []
dic={}
url = monitorInitFile.spark_url_running
req = request.Request(url)
res_data = request.urlopen(req)
re