python脚本利用shell查询hive表根据打印的输出日志获取数据腾讯机器人转发查询结果

时间城-最光阴

已于 2023-05-05 17:41:19 修改

阅读量309

点赞数

文章标签： hive 机器人 shell

于 2023-05-05 17:33:50 首次发布

本文链接：https://blog.csdn.net/qq_34247077/article/details/130511705

版权

# -*- coding=utf-8 -*-

import pandas as pd
import time
import datetime
import random
import threading
import _strptime
import sys
import requests
import xlwt
import re
import subprocess as sp
from retrying import retry


reload(sys)
sys.setdefaultencoding('utf8')

today = datetime.datetime.now().date()
today_str=str(today)
print(today_str)

dirpath = {
    'dirpath':'/tmp/jiankong/zzq',
    'filepath':'/tmp/jiankong/zzq/{0}_shangpinbiangeng.xls'.format(today_str)
}

static_config = {
    'webhook': 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=###',
}

print('文件路径为:',dirpath['filepath'])
filename=dirpath['filepath'].split('/')[-1]
print('文件名称:',filename)


def sql_format(yestoday,yestoday2):
    hive_sql="""
     hive -e "
     
        select 
        t1.article_id
        ,t1.article_name
        ,t1.category_level3_id as category_level3_id1
        ,t2.category_level3_id  
        ,t1.category_level3_description as category_level3_description1
        ,t2.category_level3_description
        from
        (select  * from dim.dim_goods_information_have_pt where inc_day ='{yestoday}' )t1 
        left join (select  * from dim.dim_goods_information_have_pt where inc_day ='{yestoday2}')t2 
        on t1.article_id=t2.article_id
        where t1.category_level3_id<>t2.category_level3_id
        ;
             "
    """
    hive_sql_re = re.sub(r'(?s)(?:\/\*.*?\*\/|--[^\n]*)', '', hive_sql)
    # 去重空行
    hive_sql_blank = re.sub(r'(\t|\s)*(\n|\r\n)', '\n', hive_sql_re)
    hive_sql=re.sub(r'(\`)','',hive_sql_blank)
    return hive_sql.format(yestoday=yestoday,yestoday2=yestoday2)

# 设定工作内容 设定重试次数  等待最小时间 最大时间  retry 参数不能外部传参
@retry(stop_max_attempt_number=2,wait_random_min=1, wait_random_max=1000*10)
def work(yestoday,yestoday2,filename):

    try:
        hive_sql=sql_format(yestoday,yestoday2)
    except Exception as e:
        print('sql格式化错误'+str(e))

    print("执行sql如下")
    print(hive_sql)
    start_time=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())  # 2023-02-21 16:07:14

    # 执行sql
    p=sp.Popen(hive_sql,shell=True,stdout=sp.PIPE,stderr=sp.PIPE,close_fds=True)
    # out为 执行指令的输出结果  info为指令执行的信息
    out,info=p.communicate()
    returncode=p.returncode

    if returncode!=0:
        raise Exception("代码出错了")

    print('输出类型为:')
    print(type(out))
    print('>>>>>   hive_sql=[{}] out=[{}] info=[{}] returncode=[{}]'.format(hive_sql, out, info, returncode))

    # 处理字符串
    comment_list=out.split('\n')
    print('------------拆分的日志如下:-------------')
    resultSQLlist=[]
    for i in comment_list[2:-1]:
        if "+" in i:
            continue
        print('按|分割')
        list=i.split('|')

        article_id=str(list[1].strip())
        print(article_id)
        article_name=list[2].strip().encode('utf-8')
        print(type(article_name))
        print(article_name)
        category_level3_id1=str(list[3].strip())
        print(category_level3_id1)
        category_level3_id=str(list[4].strip())
        print(category_level3_id)
        category_level3_description1=list[5].strip().encode('utf-8')
        print(category_level3_description1)
        category_level3_description=list[6].strip().encode('utf-8')
        print(category_level3_description)
        content=str(article_id)+','+str(article_name)+','+str(category_level3_id1)+','+str(category_level3_id)+','+str(category_level3_description1)+','+str(category_level3_description)
        resultSQLlist.append(content)
        print(content)
    print('查询结果数据集如下:')
    print(resultSQLlist)

    if resultSQLlist.__len__()>=1:
        # 创建 excel 对象
        book = xlwt.Workbook(encoding='utf-8',style_compression=0)
        # 设置表单名称
        sheet = book.add_sheet('商品主数据变更记录',cell_overwrite_ok=True)

        # 设置列名
        col=['商品编码', '商品名称', '变更后小分类编码', '变更前小分类编码','变更后小分类名称','变更前小分类名称']

        len=resultSQLlist.__len__()

        for i in range(0,len):
            data=resultSQLlist[i].split(',')
            for j in range(0,6):
                print(data[j])
                # 写入表头数据 行 列 列值
                sheet.write(0,j,col[j])
                # 写入查询的其余数据 从第二行开始写入
                sheet.write(i+1,j,data[j])

        try:
            os.makedirs(dirpath['dirpath'])
        except Exception:
            print('文件已存在')
        try:
            os.remove(dirpath['filepath'])
            print('删除文件成功')
        except Exception:
            print('文件不存在')
        savepath=dirpath['filepath']
        book.save(savepath)
        oFileUploadUrl = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/upload_media?key=###&type=file'
        oWX_URL = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/upload_media?key=###'
        payload={'Content-Disposition': 'form-data','name': 'media','filename': filename}
        files=[('boundary', ('%s' % filename, open('%s' % savepath, 'rb'), 'application/xls'))]
        headers={}
        try:
            response = requests.request("POST", oFileUploadUrl, headers=headers, data=payload, files=files,timeout=5)
        except Exception as e:
            print(e.args[0])
        json_res = response.json()
        print('返回的结果为',json_res)
        media_id = json_res['media_id']
        if media_id != "":
            data = {"msgtype": "file",
                    "file": {
                        "media_id": media_id
                    }
                    }
            headers = {'Content-Type': 'application/json'}  # 指定提交的是json
            try:
                response = requests.post(
                    url=oWX_URL, json=data, headers=headers, timeout=5)
            except Exception as e:
                print(e.args[0])

        mBody = {
            "msgtype": "file",
            "file": {
                "media_id":media_id
            }
        }
        mHeader = {'Content-Type': 'application/json; charset=UTF-8'}
        requests.post(url=static_config['webhook'], json=mBody, headers=mHeader)
    else :
        print('没有查询到有效数据')
if __name__ == '__main__':

    # 昨天日期
    yestoday= str(datetime.datetime.now().date()+datetime.timedelta(days=-1))
    print('昨天的日期为'+str(yestoday))

    # 前天日期
    yestoday2= str(datetime.datetime.now().date()+datetime.timedelta(days=-2))
    print('前天的日期为'+str(yestoday2))
    
    work(end_date,begin_date,filename)

`输出日志:
![在这里插入图片描述](https://img-blog.csdnimg.cn/568dd9f4796347468bf6b85b6e18246b.png)