# -*- coding=utf-8 -*-
import pandas as pd
import time
import datetime
import random
import threading
import _strptime
import sys
import requests
import xlwt
import re
import subprocess as sp
from retrying import retry
reload(sys)
sys.setdefaultencoding('utf8')
today = datetime.datetime.now().date()
today_str=str(today)
print(today_str)
dirpath = {
'dirpath':'/tmp/jiankong/zzq',
'filepath':'/tmp/jiankong/zzq/{0}_shangpinbiangeng.xls'.format(today_str)
}
static_config = {
'webhook': 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=###',
}
print('文件路径为:',dirpath['filepath'])
filename=dirpath['filepath'].split('/')[-1]
print('文件名称:',filename)
def sql_format(yestoday,yestoday2):
hive_sql="""
hive -e "
select
t1.article_id
,t1.article_name
,t1.category_level3_id as category_level3_id1
,t2.category_level3_id
,t1.category_level3_description as category_level3_description1
,t2.category_level3_description
from
(select * from dim.dim_goods_information_have_pt where inc_day ='{yestoday}' )t1
left join (select * from dim.dim_goods_information_have_pt where inc_day ='{yestoday2}')t2
on t1.article_id=t2.article_id
where t1.category_level3_id<>t2.category_level3_id
;
"
"""
hive_sql_re = re.sub(r'(?s)(?:\/\*.*?\*\/|--[^\n]*)', '', hive_sql)
# 去重空行
hive_sql_blank = re.sub(r'(\t|\s)*(\n|\r\n)', '\n', hive_sql_re)
hive_sql=re.sub(r'(\`)','',hive_sql_blank)
return hive_sql.format(yestoday=yestoday,yestoday2=yestoday2)
# 设定工作内容 设定重试次数 等待最小时间 最大时间 retry 参数不能外部传参
@retry(stop_max_attempt_number=2,wait_random_min=1, wait_random_max=1000*10)
def work(yestoday,yestoday2,filename):
try:
hive_sql=sql_format(yestoday,yestoday2)
except Exception as e:
print('sql格式化错误'+str(e))
print("执行sql如下")
print(hive_sql)
start_time=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) # 2023-02-21 16:07:14
# 执行sql
p=sp.Popen(hive_sql,shell=True,stdout=sp.PIPE,stderr=sp.PIPE,close_fds=True)
# out为 执行指令的输出结果 info为指令执行的信息
out,info=p.communicate()
returncode=p.returncode
if returncode!=0:
raise Exception("代码出错了")
print('输出类型为:')
print(type(out))
print('>>>>> hive_sql=[{}] out=[{}] info=[{}] returncode=[{}]'.format(hive_sql, out, info, returncode))
# 处理字符串
comment_list=out.split('\n')
print('------------拆分的日志如下:-------------')
resultSQLlist=[]
for i in comment_list[2:-1]:
if "+" in i:
continue
print('按|分割')
list=i.split('|')
article_id=str(list[1].strip())
print(article_id)
article_name=list[2].strip().encode('utf-8')
print(type(article_name))
print(article_name)
category_level3_id1=str(list[3].strip())
print(category_level3_id1)
category_level3_id=str(list[4].strip())
print(category_level3_id)
category_level3_description1=list[5].strip().encode('utf-8')
print(category_level3_description1)
category_level3_description=list[6].strip().encode('utf-8')
print(category_level3_description)
content=str(article_id)+','+str(article_name)+','+str(category_level3_id1)+','+str(category_level3_id)+','+str(category_level3_description1)+','+str(category_level3_description)
resultSQLlist.append(content)
print(content)
print('查询结果数据集如下:')
print(resultSQLlist)
if resultSQLlist.__len__()>=1:
# 创建 excel 对象
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
# 设置表单名称
sheet = book.add_sheet('商品主数据变更记录',cell_overwrite_ok=True)
# 设置列名
col=['商品编码', '商品名称', '变更后小分类编码', '变更前小分类编码','变更后小分类名称','变更前小分类名称']
len=resultSQLlist.__len__()
for i in range(0,len):
data=resultSQLlist[i].split(',')
for j in range(0,6):
print(data[j])
# 写入表头数据 行 列 列值
sheet.write(0,j,col[j])
# 写入查询的其余数据 从第二行开始写入
sheet.write(i+1,j,data[j])
try:
os.makedirs(dirpath['dirpath'])
except Exception:
print('文件已存在')
try:
os.remove(dirpath['filepath'])
print('删除文件成功')
except Exception:
print('文件不存在')
savepath=dirpath['filepath']
book.save(savepath)
oFileUploadUrl = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/upload_media?key=###&type=file'
oWX_URL = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/upload_media?key=###'
payload={'Content-Disposition': 'form-data','name': 'media','filename': filename}
files=[('boundary', ('%s' % filename, open('%s' % savepath, 'rb'), 'application/xls'))]
headers={}
try:
response = requests.request("POST", oFileUploadUrl, headers=headers, data=payload, files=files,timeout=5)
except Exception as e:
print(e.args[0])
json_res = response.json()
print('返回的结果为',json_res)
media_id = json_res['media_id']
if media_id != "":
data = {"msgtype": "file",
"file": {
"media_id": media_id
}
}
headers = {'Content-Type': 'application/json'} # 指定提交的是json
try:
response = requests.post(
url=oWX_URL, json=data, headers=headers, timeout=5)
except Exception as e:
print(e.args[0])
mBody = {
"msgtype": "file",
"file": {
"media_id":media_id
}
}
mHeader = {'Content-Type': 'application/json; charset=UTF-8'}
requests.post(url=static_config['webhook'], json=mBody, headers=mHeader)
else :
print('没有查询到有效数据')
if __name__ == '__main__':
# 昨天日期
yestoday= str(datetime.datetime.now().date()+datetime.timedelta(days=-1))
print('昨天的日期为'+str(yestoday))
# 前天日期
yestoday2= str(datetime.datetime.now().date()+datetime.timedelta(days=-2))
print('前天的日期为'+str(yestoday2))
work(end_date,begin_date,filename)
`输出日志:
![在这里插入图片描述](https://img-blog.csdnimg.cn/568dd9f4796347468bf6b85b6e18246b.png)
python脚本 利用shell查询hive表 根据打印的输出日志 获取数据 腾讯机器人转发查询结果
于 2023-05-05 17:33:50 首次发布