python通过url获取json并解析

该脚本主要用于从ClouderaManager获取严重级别为critical的事件,将其发送到企微群提醒并存储结果。使用Python的pyspark、pymysql库处理数据,通过CM-API获取JSON格式的事件数据,并将数据插入到MySQL监控数据库中。
摘要由CSDN通过智能技术生成
# -*- coding: utf-8 -*-

# 任务名称:CM事件监控 moni_cm_event
# 任务功能说明:读取Cloudera Manager事件提醒,输出到企微群提醒,并保存结果。
# ------------------------------------------------------------------------------------------------
# 目标表:
# 01. schedulem.task_alert
# 02. schedulem.moni_cm_event
# 源表: 
# 01.curl -u admin:your_password 'http://xxx:7180/api/v19/events?query=severity==critical;timeReceived=ge=2020-08-16T00:00:00.000Z'
# ------------------------------------------------------------------------------------------------
# 创建人:Lemon.Warn 创建时间: 
# 修改 - 01 :
#       修改人: xxx 修改时间:  
#       需求编号:
#       修改内容:
# ------------------------------------------------------------------------------------------------

from pyspark.sql import SparkSession
import datetime
import csv,codecs
import sys
import pymysql
import os
import json 

reload(sys)  
sys.setdefaultencoding('utf8') # 设置编码

# 自定义异常
class Error(BaseException):
    def __init__(self, msg):
        self.msg = msg

    def __str__(self):
        return self.msg

# 读取配置文件
def get_task_config(path):
    task_config = [] # 配置列表 每一个元素为一行配置
    f = codecs.open(path, 'rb', 'gb2312')
    f_csv = list(f)
    for row in f_csv[1:]: # 跳过表头
        if len(row) > 0: row = row.split(',')
        else: row = []
        print(row)
        task_config.append(row)
    f.close()
    print("Info:读取配置文件成功")
    return task_config
   
# 通过CM-API获取事件数据(json格式)
def get_url_page(user, pwd, url, save_path):
    order = "curl -u %s:%s '%s'> %s  "%(user, pwd, url, save_path)
    print(order)
    os.system(order)
    # order = "cat %s |  tr '\\n' '\\t'> %s  "%(save_path, save_path1)
    # print(order)
    # os.system(order) 
    # order = "hive -e \"load data local inpath '%s' overwrite into table icl_prd.moni_cm_event_f; \""%(save_path1)
    # print(order)
    # os.system(order) 
    
    
# 读取并解析json文件
# def resolve_json():
    # df_lateral_sql = "select  t3.id, t3.content, t3.timeOccurred, t3.category, \
                              # substring(t5.values, 3, length(t5.values)-4) as values \
                      # from icl_prd.moni_cm_event_f  \
                      # LATERAL VIEW json_tuple(json, 'totalResults', 'items')t2 as totalResults, items \
                      # LATERAL VIEW json_tuple(substring(t2.items, 2, length(t2.items)-2), 'id', 'content', 'timeOccurred', 'category', 'attributes')t3 \
                        # as id, content, timeOccurred, category, attributes \
                      # LATERAL VIEW explode( \
                                            # split( \
                                                # regexp_replace( \
                                                    # substring(t3.attributes, 2, length(t3.attributes) -2), \
                                                    # '\\\}\\\,\\\{', \
                                                    # '\\\}\\\;\\\{' \
                                                # ), \
                                                # '\\\;' \
                                            # ) \
                                          # ) t4 as attributes2  \
                      # LATERAL VIEW json_tuple(t4.attributes2,'name','values')t5 as name,values \
                      # where t5.name = 'URL' "
    # print(df_lateral_sql)
    # df_lateral = spark.sql(df_lateral_sql)
    # df_lateral.show()
    # return df_lateral
    
# 任务监控
def task_monitor(host, user, password, database, charset, save_path):
    try:
        db_moni = pymysql.connect(host=host, user=user, password=password,
                            database=database, charset=charset)		
        print("Info:连接监控mysql数据库成功")
    except:
        raise Error('连接监控mysql数据库失败')    
    cursor_moni = db_moni.cursor()
    
    # 数据库日志信息存入监控mysql数据库
    fp = codecs.open(save_path, 'rb', 'utf-8')
    json_data = json.load(fp)
    for items in json_data['items']:
        id = items['id']
        content = items['content']
        timeOccurred = (datetime.datetime.strptime(items['timeOccurred'], "%Y-%m-%dT%H:%M:%S.%fZ")+datetime.timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')
        category = items['category']
        url = ' '
        for attributes in items['attributes']:
            if attributes['name'] == 'URL':
                url = attributes['values']
        alert_message = "事件id:%s\n事件内容:%s\n事件时间:%s\n事件类型:%s\n详情链接:%s"%(id, content, timeOccurred, category, url)
        alert_row = [id, 'moni_cm_event', str(datetime.datetime.now()), alert_message, 'False', 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=39526f06-cdd1-472a-ae71-872a007b80bb' ,'${bizdate}']
        insert_alert_sql = "insert ignore into task_alert (id, task_name, alert_time, alert_message, is_alert, alert_url, stat_dt) \
                            select %s, %s, %s, %s, %s, %s, %s  where not exists (select 1 from task_alert_history where \
                                    stat_dt='${bizdate}' and id='" + id +"');"
        cursor_moni.execute(insert_alert_sql, alert_row)
        print("插入监控告警表成功")
        
        moni_row = [id, content, timeOccurred, category, url, '${bizdate}' ]
        insert_moni_sql = "insert ignore into moni_cm_event (id, content, timeOccurred, category , url, stat_dt) \
                            select %s, %s, %s, %s, %s, %s  where not exists (select 1 from task_alert_history where \
                                    stat_dt='${bizdate}' and id='" + id +"');"
        print(moni_row)
        cursor_moni.execute(insert_moni_sql, moni_row)
        print("插入集群事件监控结果表成功")
    
    # 关闭数据库连接    
    db_moni.commit()
    cursor_moni.close()
    db_moni.close()
    

if __name__ == '__main__':
    
    # ------------------配置信息-------------------------
    # 监控mysql数据库
    db_moni_host='mysqlip'
    db_moni_user='user'
    db_moni_password='password'
    db_moni_database='schedulem'
    db_moni_charset='utf8'
    # url配置
    user = 'dev_monitor'
    pwd = 'your_password'
    now_datetime = (datetime.datetime.now()-datetime.timedelta(hours=10)).strftime('%Y-%m-%dT%H:%M:%S.000Z') 
    url = 'http://xxx/api/v19/events?query=severity==critical;timeReceived=ge=%s'%(now_datetime)
    # url = 'http://xxx/api/v19/events?query=severity==critical;timeReceived=ge=2021-06-03T02:01:53.694Z'
    save_path = '/home/deploy/data/moni_cm_event.json'     
    # save_path1 = '/home/deploy/data/moni_cm_event1.json'     
    # 创建SparkSession #### SparkSession 用于替代 SQLContext 和 HiveContext,提供了统一的接口
    # spark = SparkSession.builder.master('yarn').appName('moni_cm_event').getOrCreate()
    # ------------------配置信息-------------------------
    
    # ------------------分析过程-------------------------
    
    # 通过CM-API获取事件数据(json格式)
    get_url_page(user, pwd, url, save_path)
    # 解析json数据
    # df_lateral = resolve_json()    
    
    # 监控结果
    task_monitor(db_moni_host, db_moni_user, db_moni_password, db_moni_database, db_moni_charset,
                 save_path)
    # spark.stop()
    # ------------------分析过程-------------------------

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值