python脚本

@[TOC]`import csv
import json
import os
import sys
import requests
from datetime import datetime,timedelta
from urllib import request as req
import boto3
import airflow
from airflow.models import DAG
from datetime import datetime,timedelta
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.hive_operator import HiveOperator
from airflow.utils import dates

------------------------ 脚本位置 -------------------

/home/super/job/presto_query_data

emrs = [
(‘c-f3e01a03022153cd’,‘bigdata-adhoc-presto’,‘10.38.85.141’),
]
args = {
‘owner’: ‘rui.zhao’,
‘is_show’: 0,
‘start_date’: dates.days_ago(1),
}
presto_query_data_dag = DAG(
dag_id=‘presto_query_data’,
default_args=args,
schedule_interval=‘0 /1 * * * * ',
dagrun_timeout=timedelta(minutes=60),
catchup=False
)
dummy_start = DummyOperator(task_id=‘start’, dag=presto_query_data_dag)
dummy_finish = DummyOperator(task_id=‘finish’, dag=presto_query_data_dag)
def change(bytes):
if bytes.endswith(‘GB’):
return float(bytes.split(“GB”)[0])10241024
1024
elif bytes.endswith(‘kB’):
return float(bytes.split(“kB”)[0])1024
elif bytes.endswith(‘TB’):
return float(bytes.split(“TB”)[0])10241024
1024*1024
elif bytes.endswith(‘MB’):
return float(bytes.split(“MB”)[0])10241024
else:
return format(float(bytes.split(“B”)[0]),’.3f’)
def change_time(ms):
if ms.endswith(‘h’):
return format(float(ms.split(“h”)[0])*3600,‘.2f’)
elif ms.endswith(‘m’,1):
return format(float(ms.split(“m”)[0])*60,‘.2f’)
elif ms.endswith(‘ms’):
return format(float(ms.split(“ms”)[0])/1000,‘.2f’)
elif ms.endswith(‘us’):
return format(float(ms.split(“us”)[0])/1000/1000,‘.2f’)
elif ms.endswith(‘ns’,1):
return float(ms.split(“n”)[0])
elif ms.endswith(‘d’,1):
return float(ms.split(“d”)[0])
else:
return float(ms.split(“s”)[0])
def get_data(data):
execution_date = datetime.fromisoformat(data)
hour_time = datetime.now()
hour = datetime.strftime(hour_time, ‘%Y%m%d%H’)
dt = datetime.strftime(hour_time, ‘%Y%m%d’)
path_file = “/tmp/presto-”+hour+“.csv”
csvfile = open(path_file, mode=‘a’,encoding=‘utf-8’,newline=‘\n’)
writer= csv.DictWriter(csvfile,delimiter=‘\u0001’,fieldnames=[‘emr_id’,‘emr_name’,‘presto_host’,‘queryId’,‘clientTransactionSupport’,‘user’,‘source’,‘catalog’,‘schema’,‘timeZoneKey’,‘locale’,‘remoteUserAddress’,‘userAgent’,‘resourceGroupId’,‘state’,‘memoryPool’,‘scheduled’,‘self’,‘query’,‘createTime’,‘endTime’,‘queuedTime’,‘elapsedTime’,‘executionTime’,‘totalDrivers’,‘queuedDrivers’,‘runningDrivers’,‘completedDrivers’,‘rawInputDataSize’,‘rawInputPositions’,‘cumulativeUserMemory’,‘userMemoryReservation’,‘totalMemoryReservation’,‘peakUserMemoryReservation’,‘totalCpuTime’,‘totalScheduledTime’,‘fullyBlocked’,‘progressPercentage’])
for emr_id,emr_name,presto_host in emrs:
url = f"http://“+presto_host+”:8889/v1/query"
get_data = req.urlopen(url, timeout=600).read().decode(‘utf-8’)
resp = json.loads(get_data)
for x in range(len(resp)):
if ‘schema’ in resp[x][‘session’]:
schema = resp[x][‘session’][‘schema’]
else :
schema = ‘NULL’
if ‘progressPercentage’ in resp[x][‘queryStats’]:
progressPercentage = resp[x][‘queryStats’][‘progressPercentage’]
else :
progressPercentage = 0.0
if ‘resourceGroupId’ in resp[x]:
resourceGroupId = resp[x][‘resourceGroupId’][0]
else :
resourceGroupId = ‘无数据’
if ‘endTime’ in resp[x][‘queryStats’]:
endTime = resp[x][‘queryStats’][‘endTime’]
else :
endTime = ‘NULL’
writer.writerow({
‘emr_id’:emr_id,
‘emr_name’:emr_name,
‘presto_host’:presto_host,
‘queryId’:resp[x][‘session’][‘queryId’],
‘clientTransactionSupport’:resp[x][‘session’][‘clientTransactionSupport’],
‘user’:resp[x][‘session’][‘user’],
‘source’:resp[x][‘session’][‘source’],
‘catalog’:resp[x][‘session’][‘catalog’],
‘schema’:schema,
‘timeZoneKey’:resp[x][‘session’][‘timeZoneKey’],
‘locale’:resp[x][‘session’][‘locale’],
‘remoteUserAddress’:resp[x][‘session’][‘remoteUserAddress’],
‘userAgent’:resp[x][‘session’][‘userAgent’],
‘resourceGroupId’:resourceGroupId,
‘state’:resp[x][‘state’],
‘memoryPool’:resp[x][‘memoryPool’],
‘scheduled’:resp[x][‘scheduled’],
‘self’:resp[x][‘self’],
‘query’:resp[x][‘query’].replace(‘\n’, ’ ‘),
‘createTime’:resp[x][‘queryStats’][‘createTime’],
#‘endTime’:resp[x][‘queryStats’][‘endTime’],
‘endTime’:endTime,
‘queuedTime’:resp[x][‘queryStats’][‘queuedTime’],
‘elapsedTime’:resp[x][‘queryStats’][‘elapsedTime’],
‘executionTime’:change_time(resp[x][‘queryStats’][‘executionTime’]),
‘totalDrivers’:int(resp[x][‘queryStats’][‘totalDrivers’]),
‘queuedDrivers’:int(resp[x][‘queryStats’][‘queuedDrivers’]),
‘runningDrivers’:int(resp[x][‘queryStats’][‘runningDrivers’]),
‘completedDrivers’:int(resp[x][‘queryStats’][‘completedDrivers’]),
‘rawInputDataSize’:change(resp[x][‘queryStats’][‘rawInputDataSize’]),
‘rawInputPositions’:int(resp[x][‘queryStats’][‘rawInputPositions’]),
‘cumulativeUserMemory’:int(resp[x][‘queryStats’][‘cumulativeUserMemory’]),
‘userMemoryReservation’:change(resp[x][‘queryStats’][‘userMemoryReservation’]),
‘totalMemoryReservation’:change(resp[x][‘queryStats’][‘totalMemoryReservation’]),
‘peakUserMemoryReservation’:change(resp[x][‘queryStats’][‘peakUserMemoryReservation’]),
‘totalCpuTime’:resp[x][‘queryStats’][‘totalCpuTime’],
‘totalScheduledTime’:resp[x][‘queryStats’][‘totalScheduledTime’],
‘fullyBlocked’:resp[x][‘queryStats’][‘fullyBlocked’],
‘progressPercentage’:progressPercentage
})
csvfile.close()
boto3.resource(‘s3’)
.Object(‘transsion-datacenter-ire’, f’tranadm/presto_query_data/dt={dt}/{hour}.csv’)
.upload_file(path_file)
os.remove(path_file)
get_all_apps_task = PythonOperator(
task_id=‘get_data_task’,
python_callable=get_data,
dag=presto_query_data_dag,
op_kwargs={‘data’: ‘{{ execution_date }}’}
)
add_partition = HiveOperator(
task_id=“add_partition”,
hql=“alter table presto_query_data add if not exists partition (dt=‘{{ds_nodash}}’);”,
schema=“tranadm”,
dag=presto_query_data_dag
)
dummy_start >> get_all_apps_task >> add_partition >> dummy_finish`

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值