##后台nginx日志mpi-web报表生成use bi;##set ($suffix = "${env.YYYY}${env.MM}${env.DD}");##今日临时表创建drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD};create table if not exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}(server_ip string,url string,times int,avg_response_today int,avg_body_size int,response_line95_today int,count_50x int,count_40x int,count_30x int,count_20x int)ROW FORMAT DELIMITEDFIELDS TERMINATED BY '\005'COLLECTION ITEMS TERMINATED BY '\002'MAP KEYS TERMINATED BY '\003'LINES TERMINATED BY '\n'STORED AS ORC;##昨日临时表创建drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD};create table if not exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD}(server_ip string,url string,times int,avg_response_today int,avg_body_size int,response_line95_today int,count_50x int,count_40x int,count_30x int,count_20x int)ROW FORMAT DELIMITEDFIELDS TERMINATED BY '\005'COLLECTION ITEMS TERMINATED BY '\002'MAP KEYS TERMINATED BY '\003'LINES TERMINATED BY '\n'STORED AS ORC;##今日报表生成insert into table dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}selectserver_ip,concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url_s,count(url) as times,round(avg(response_time)*1000) as avg_response_today,round(avg(body_bytes_sent)) as avg_body_size,round(percentile(round(response_time*1000),0.95)) as response_line95_today,sum(if(status like '50%' ,1 ,0)) as count_50x,sum(if(status like '40%' ,1 ,0)) as count_40x,sum(if(status like '30%' ,1 ,0)) as count_30x,sum(if(status like '20%' ,1 ,0)) as count_20xfromdpdw_nginx_log_allwhereapplication = 'mpi-web'and hp_stat_date='${env.YYYYMMDD}'and parse_url(url, 'PATH')<>'/index.jsp'and parse_url(url, 'PATH')<>'/heartbeat.html'and parse_url(url, 'HOST')<>'dp'and parse_url(url, 'HOST')<>'www.dianping.com'group byconcat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,applicationorder bytimes desc;##昨日报表生成insert into table dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD}selectserver_ip as server_ip,concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url,count(url) as times,round(avg(response_time)*1000) as avg_response_today,round(avg(body_bytes_sent)) as avg_body_size,round(percentile(round(response_time*1000),0.95)) as response_line95_today,sum(if(status like '50%' ,1 ,0)) as count_50x,sum(if(status like '40%' ,1 ,0)) as count_40x,sum(if(status like '30%' ,1 ,0)) as count_30x,sum(if(status like '20%' ,1 ,0)) as count_20xfromdpdw_nginx_log_allwhereapplication = 'mpi-web'and hp_stat_date='${env.YYYYMMDD_P1D}'and parse_url(url, 'PATH')<>'/index.jsp'and parse_url(url, 'PATH')<>'/heartbeat.html'and parse_url(url, 'HOST')<>'dp'and parse_url(url, 'HOST')<>'www.dianping.com'group byconcat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,applicationorder bytimes desc;insert overwrite table dprpt_mpi_nginx_log_daily partition(hp_cal_dt = '${env.YYYYMMDD}')selectt.server_ip as server_ip,t.url as url,t.times as times,coalesce(y.times,0) as times_cmp,t.avg_response_today as avg_response_today,coalesce(y.avg_response_today,0) as avg_response_cmp,t.response_line95_today as response_line95_today,coalesce(y.response_line95_today,0) as response_line95_today_cmp,t.avg_body_size as avg_body_size,t.count_20x as count_20x,coalesce(y.count_20x,0) as count_20x_cmp,t.count_30x as count_30x,coalesce(y.count_30x,0) as count_30x_cmp,t.count_40x as count_40x,coalesce(y.count_40x,0) as count_40x_cmp,t.count_50x as count_50x,coalesce(y.count_50x,0) as count_50x_cmpfrom dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD} t left outer join dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD} yon t.url=y.url and t.server_ip=y.server_iporder by times desc;drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD};drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD};
hive-1
最新推荐文章于 2023-09-13 19:49:08 发布