用Python脚本扫码hdfs、oss中是否生成文件并钉钉报警

最新推荐文章于 2020-12-22 19:37:06 发布

菜鸟也有梦想啊

最新推荐文章于 2020-12-22 19:37:06 发布

阅读量254

点赞数

分类专栏：工作中问题

本文链接：https://blog.csdn.net/huaicainiao/article/details/108825228

版权

工作中问题专栏收录该内容

6 篇文章 0 订阅

订阅专栏

之前的需求有点变化，需要读取topic后去生成自动运行监测的脚本，因此需要监测两小时的数据，同时出现问题还需要钉钉报警。

思路，在数据库中添加一个字段，isMonitor，1表示监控状态，0表示未监控状态。从库中读出所有的topic，将他们按照“，”分割去处理，path直接到代码中写死，因为我们使用的表的格式比较像，当然也可以直接从数据库中读取path

# -*- coding: utf-8 -*-
import sys
from commands import *
import datetime
import pymysql
import functools
import requests

pt = datetime.datetime.now().strftime('%Y%m%d')
slsAndKafkaPath = "oss://a/b/c/logs_bi/topic_name/%Y%m%d"
nginxPath = "oss://a/b/c/nginx/%Y%m%d"
hdfsPath = "hdfs://a/b/c/topic_name/%Y%m%d"
hdfsPath1 = "hdfs://a/b/d/topic_name/%Y%m%d"


def list_unpack(l):
    """拆开一层嵌套列表元组"""
    return functools.reduce(lambda x, y: x + y, l)

def getPath():
    li = []
    db = pymysql.connect("ip", "账号", "密码", "库名", port=3306)

    cursor = db.cursor()

    sql = """
    select topic_name from testTable where isMonitor = 1
    """

    cursor.execute(sql)
    data = cursor.fetchall()
    if not data:
        return ""


    data = list(data)
    data = list_unpack(data)
    data = list(data)
    data = ','.join(data)
    data = str(data)
    data = data.split(",")

    cursor.close()
    db.close()

    return data


def monitorDingDing():
    url = "钉钉机器人url"
    headers = {'content-type': "application/json"}
    values = """
                {
            "msgtype": "text",
            "text": {
                "content": "test测试扫描文件失败:任务失败"
            },
            "at": {
                "atMobiles": [
                    ""
                ],
                "isAtAll": true
            }
        }
        """
    req = requests.post(url, values, headers=headers)
    print "钉钉报警信息：",req.text


def runCmd(cmd):
    cmd = "source /etc/profile;%s" % (cmd)
    status, output = getstatusoutput(cmd)

    if status == 0:
        return output
    else:
        sys.exit(1)


def getDirDate():
    data = getPath()

    for i in data:

        if i.__contains__("nginx"):
            path = nginxPath.replace("%Y%m%d", pt)
            print(cmd)
            cmd = """hdfs dfs -ls %s | awk '{print $7}' | tail -2 | head -1""" % (path)
        elif i.__contains__("bigdata"):
            path = hdfsPath.replace("topic_name", i)

            path = path.replace("%Y%m%d", pt)
            print(cmd)
            cmd = """hdfs dfs -ls %s | awk '{print $7}' | tail -2 | head -1""" % (path)
            path = hdfsPath.replace(i, "topic_name")
        elif i.__contains__("parent"):
            path = hdfsPath1.replace("topic_name", i)

            path = path.replace("%Y%m%d", pt)
            cmd = """hdfs dfs -ls %s | awk '{print $7}' | tail -2 | head -1""" % (path)
            print(cmd)
            path = hdfsPath1.replace(i, "topic_name")
        else:
            path = slsAndKafkaPath.replace("topic_name", str(i))
            path = path.replace("%Y%m%d", pt)

            cmd = """hdfs dfs -ls %s | awk '{print $7}' | tail -2 | head -1""" % (path)
            print(cmd)
            path = path.replace(str(i), "topic_name")

    rt = runCmd(cmd)
    return rt


file_time = getDirDate()
print(file_time)
d1 = datetime.datetime.strptime(file_time, '%H:%M')
cur_time = datetime.datetime.now().strftime('%H:%M')
d2 = datetime.datetime.strptime(cur_time, '%H:%M')
print(cur_time)
time = (d2 - d1).seconds
if time > 100: #设置为100是为了让脚本运行后直接报警，正常使用需要设置为7200
    monitorDingDing()
    print("find problem")
else:
    print("check success")