大数据Python数据质量监控

-- coding: UTF-8 --

import sys
reload(sys)
sys.setdefaultencoding(‘utf-8’)

import httplib
import urllib
import json
import time
import os
import commands
import requests
import numpy as np
import schedule
import pymysql
from pyhive import hive
wx_url = ‘https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=dc776459-12e8-4272-a202-185385035d1d’ #把机器人的key
import pandas as pd
import datetime

if name == “main” :
current_time = datetime.datetime.now()

db_monitor = pymysql.connect(host='172.18.189.16',
                port=3307,
                 user='db_monitor',
                 password='db_monitor%123',
                 database='db_monitor',
                 charset='utf8')

cursor_mo = db_monitor.cursor()
cursor_mo1 = db_monitor.cursor()

db_mysql = pymysql.connect(host='rr-wz990f7ie67nii33j.mysql.rds.aliyuncs.com',
                 port=3306,
                 user='bigdata',
                 password='zS^sNH4m%LjWQOMN',
                 database='open_order',
                 charset='utf8')

cursor_my = db_mysql.cursor()

hive_conn = hive.Connection(host="172.18.189.16",
                         port=10000,
                         username="wanghai",
                         password="Wanghai123!",
                         auth='LDAP',
                         database='ods')

cursor_hi = hive_conn.cursor()

# 获取需要计算的时间点
before = (datetime.datetime.now() - datetime.timedelta(minutes=20)).strftime("%Y-%m-%d %H:%M:%S")
# 获取监控列表源表名和hive表名
get_source_table = "select source_table,desc_table from monitor;"
cursor_mo.execute(get_source_table)
while True:
    row = cursor_mo.fetchone()
    if not row:
        break
    print("source_table: %s, desc_table: %s" % (row[0], row[1]))
    # 查询源表条数
    p_num=0
    h_num=0
    if row[0] =="open_user.oa_person":
        print("select count(1) from %s where oau_gmt_create <= '%s';" % (row[0], before))
        cursor_my.execute("select count(1) from %s where oau_gmt_create <= '%s';" % (row[0], before))
        p_num=cursor_my.fetchone()[0]
        # 查询hive表实际条数
        print("select count(1) from %s where oau_gmt_create <='%s' " % (row[1], before))
        cursor_hi.execute("select count(1) from %s where oau_gmt_create <= '%s'" % (row[1], before))
        h_num = cursor_hi.fetchone()[0]
    else :
        print("select count(1) from %s where gmt_create <= '%s';" % (row[0], before))
        cursor_my.execute("select count(1) from %s where gmt_create <= '%s';" % (row[0], before))
        p_num=cursor_my.fetchone()[0]
        # 查询hive表实际条数
        print("select count(1) from %s where gmt_create <='%s' " % (row[1], before))
        cursor_hi.execute("select count(1) from %s where gmt_create <= '%s' " % (row[1], before))
        h_num = cursor_hi.fetchone()[0]
    # 更新两表条数
    print("update monitor set source_table_cnt = %s where source_table='%s' " % (p_num,row[0]))
    cursor_mo1.execute("update monitor set source_table_cnt = %s where source_table='%s'" % (p_num,row[0]))
    db_monitor.commit()
    print("update monitor set desc_table_cnt = %s where source_table='%s'" % (h_num,row[0]))
    cursor_mo1.execute("update monitor set desc_table_cnt = %s where source_table='%s'" % (h_num,row[0]))
    db_monitor.commit()
    if h_num!=p_num:
        num = p_num - h_num
        cursor_mo1.execute("update monitor set num = %s where source_table='%s' " % (num,row[0]))
        db_monitor.commit()
        cursor_mo1.execute("update monitor set status = 0 where source_table='%s' " % (row[0]))
        db_monitor.commit()
    else:
        cursor_mo1.execute("update monitor set num = 0 where source_table='%s' " % (row[0]))
        db_monitor.commit()
        cursor_mo1.execute("update monitor set status = 1 where source_table='%s' " % (row[0]))
        db_monitor.commit()
    print("Number of rows returned: %s" % (cursor_mo.rowcount))
cursor_mo1.execute("select desc_table,source_table_cnt,desc_table_cnt,num from monitor where status=0")
data = cursor_mo1.fetchall()
columnDes = cursor_mo1.description #获取连接对象的描述信息
# columnNames = [columnDes[i][0] for i in range(len(columnDes))] #获取列名
df = pd.DataFrame([list(i) for i in data]) #得到的data为二维元组,逐行取出,转化为列表,再转化为df
res = df.to_csv(header=None)
if cursor_mo1.rowcount != 0:
    header = {
        "Content-Type": "application/json;charset=UTF-8"
    }
    message_body = {
        "msgtype": "markdown",
        "markdown": {
            "content": str(res)+"\n 表数据异常,请检查"
        },
        "at": {
            "atMobiles": [],
            "isAtAll": False
        }
    }
    send_data = json.dumps(message_body)  
    ChatBot = requests.post(url=wx_url, data=send_data, headers=header)
    opener = ChatBot.json()
    if opener["errmsg"] == "ok":
        print(u"%s 通知消息发送成功!" % opener)
    else:
        print(u"通知消息发送失败,原因:{}".format(opener))
else:
    header = {
        "Content-Type": "application/json;charset=UTF-8"
    }
    message_body = {
        "msgtype": "markdown",
        "markdown": {
            "content": str(res)+"所有表数据正常!"
        },
        "at": {
            "atMobiles": [],
            "isAtAll": False
        }
    }
    send_data = json.dumps(message_body)  
    ChatBot = requests.post(url=wx_url, data=send_data, headers=header)
    opener = ChatBot.json()
    if opener["errmsg"] == "ok":
        print(u"%s 通知消息发送成功!" % opener)
    else:
        print(u"通知消息发送失败,原因:{}".format(opener))
cursor_mo.close()
cursor_mo1.close()
cursor_my.close()
cursor_hi.close()
db_monitor.close()
db_mysql.close()
hive_conn.close()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值