背景: 有些报表除了需要每天的数据走势图,有时还需要每周的平均和每月的平均,再和上周或月作比较,计算差值,然后分别形成周平均报表和月平均报表。
日报表:每天计算报表数据
周报表:每周某个时间只计算一次
月报表:每月每个时间只计算一次
以下设计可以作为比较通用的周月差值计算方式。
考虑到查询条件的跨年,所以设计表的时候有一个日期是标志某个周期内的数据,这样查询就能通过这个日期连续的查询。etldate
结果展示:
以下是详细代码,包括设计的表和思路:
#!/usr/bin/env bash
# ************************************************************************
# Useage: dwd_base_sall_week_month.sh[data_date]
# Author: author
# Version: V14.00.001
# Func Desc : XXX报表计算周报 月报
# modify describe :
# yyyymmdd version author modified
VERSION="V14.00.001" #初始版本为001,后面修改为递增1
if [ $# -gt 1 ];then # 参数个数,需视具体参数修改
echo "Params error."
echo "Useage: dwd_base_sall_week_month.sh [data_date]"
exit 1
fi
logfile=$BIPROG_ROOT/logs/`basename $0`.log #定义写日志文件名
#写日志函数
writelog()
{
echo `date "+%Y-%m-%d %H:%M:%S"`" $1"|tee -a $logfile
}
#执行shell 命令
EXESHELL()
{
vIniNum=`expr ${vIniNum} + 1`
if [ ${vIniNum} -lt ${vCurNum} ]
then
writelog "STEP ${vIniNum}: < ${vCurNum} SKIP ..."
return 0
fi
writelog "STEP ${vIniNum}: $CMD "
eval $CMD
if [ $? -ne 0 ]
then
writelog "SH Command failed! exit..."
exit 1
fi
}
vDay=${1:-"`lastday YYYY-MM-DD`"} #如果没有传日期的参数,默认取昨天
dtstr=`date -d "0 day ago " +%Y-%m-%d" "%H:%M:%S`
serven_day_before_etldate=`date -d "7 day ago ${vDay}" +"%Y-%m-%d"`
# 获取该日期在一年中的第几周
third_week=`date -d "0 day ago $vDay" +%U`
# 获取该日期是星期几
current_day_week=`date -d "0 day ago $vDay" +%w`
# 该日期的的年份
current_year=`date -d "0 day ago $vDay" +%Y`
# 该日期的月份
current_month=`date -d "0 day ago $vDay " +%m`
# 该日期的7天前的月份
serven_day_before_month=`date -d "7 day ago $vDay" +%m`
# 该日期的 日
current_day=`date -d "0 day ago $vDay" +%d`
# 该日期7天前的日
serven_day_before=`date -d "7 day ago $vDay" +%d`
# 定义变量 判断是否 执行周或月
count_num=0
# 周 月报
# drop table biuser.temp_period_order_cnt ;
# create table biuser.temp_period_order_cnt (
# etldate string comment '日期',
# year_val string comment '年份',
# week_third string comment '第几周',
# etldate_tag string comment '范围标签 01.04-01.10',
# current_period_avg string comment '当前周/月平均值',
# last_period_avg string comment '上周/月平均值',
# difference_val string comment '差值' ,
# hb_val string comment '环比值',
# data_type string comment '数据类型',
# time_dim string comment '周/月',
# addtimes string comment '加载时间'
# ) comment 'XXX报表周/月报表'
# partitioned by (dt string,dataType string)
# row format delimited
# fields terminated by '|'
# lines terminated by '\n' stored as textfile ;
# data_type =1 pcXXX匹配率
# data_type =2 pc在线匹配率
# data_type =3 PCXXX搜索网络失败率
# data_type =4 PC本地XXX使用占比
# data_type =5 PCXXX手动搜索占比
# 开始计算周报 每周跑一次 每周日起跑
# 判断日期
SQL="
INSERT OVERWRITE TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1')
select
'${vDay}' etldate ,
'$current_year' year_val,
'$third_week' as week_third,
'$serven_day_before_month.$serven_day_before-$current_month.$current_day' as etldate_tag,
a.current_period_avg as current_period_avg,
b.current_period_avg as last_period_avg,
round((b.current_period_avg - a.current_period_avg),2) as difference_val,
round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2) hb_val,
a.data_type,
a.time_dim,
'${dtstr}' as addtimes
from
(select
'1' as data_type,
'周' as time_dim,
round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg
from biuser.dwd_base_sall where dt>='${serven_day_before_etldate}' and dt<'${vDay}'
)a left outer join
(select
etldate,
etldate_tag,
current_period_avg,
last_period_avg,
difference_val,
hb_val,
data_type,
time_dim
from biuser.temp_period_order_cnt where dt='${serven_day_before_etldate}' and datatype='1' and time_dim='周'
) b
on a.data_type=b.data_type and a.time_dim=b.time_dim;
"
if [ $current_day_week = 0 ]
then
echo '周末'
EXE_HIVE "${SQL}"
count_num=1
else
echo '非周末'
fi
#开始计算月报 每月月初 计算上个月的月报 每个月的月初01 计算上个月的
# 年
y_report=`date -d ''$vDay' -1 days' +%Y`
# 月
m_report=`date -d ''$vDay' -1 days' +%m`
m_Last_report_day=`date -d "${vDay} 1 month ago" +%Y-%m-%d`
# 日
d_report=`date -d ''$vDay' -1 days' +%d`
echo '年'$y_report
echo '月'$m_report
echo '一个月前的日期'$m_Last_report_day
echo '日'$d_report
#是周末 也是月初
SQL="
INSERT into TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1')
select
'${vDay}' etldate ,
'$current_year' year_val ,
'$current_month' week_third,
' ' etldate_tag,
a.current_period_avg as current_period_avg,
b.current_period_avg as last_period_avg,
round((b.current_period_avg - a.current_period_avg),2) as difference_val,
round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2) hb_val,
a.data_type,
a.time_dim,
'${dtstr}' as addtimes
from
(select
'1' as data_type,
'月' as time_dim,
round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg
from biuser.dwd_base_sall where dt>='${m_Last_report_day}' and dt<'${vDay}'
)a left outer join
(select
etldate,
etldate_tag,
current_period_avg,
last_period_avg,
difference_val,
hb_val,
data_type,
time_dim
from biuser.temp_period_order_cnt where dt='${m_Last_report_day}' and datatype='1' and time_dim='月'
) b
on a.data_type=b.data_type and a.time_dim=b.time_dim;
"
# 如果是月初 不是周末
SQL2="
INSERT OVERWRITE TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1')
select
'${vDay}' etldate ,
'$current_year' year_val ,
'$current_month' week_third,
' ' etldate_tag,
a.current_period_avg as current_period_avg,
b.current_period_avg as last_period_avg,
round((b.current_period_avg - a.current_period_avg),4) as difference_val,
round((b.current_period_avg - a.current_period_avg)/b.current_period_avg,4) hb_val,
a.data_type,
a.time_dim,
'${dtstr}' as addtimes
from
(select
'1' as data_type,
'月' as time_dim,
round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg
from biuser.dwd_base_sall where dt>='${m_Last_report_day}' and dt<'${vDay}'
)a left outer join
(select
etldate,
etldate_tag,
current_period_avg,
last_period_avg,
difference_val,
hb_val,
data_type,
time_dim
from biuser.temp_period_order_cnt where dt='${m_Last_report_day}' and datatype='1' and time_dim='月'
) b
on a.data_type=b.data_type and a.time_dim=b.time_dim;
"
if [ $(date -d ''$vDay' -1 days' +%d) -eq $(cal $m_report $y_report |xargs|awk '{print $NF}') ]
then
echo '月初'
if [ $count_num = 1 ]
then
echo '是周末,是月初'
EXE_HIVE "${SQL}"
else
EXE_HIVE "${SQL2}"
fi
count_num=2
else
echo '非月初'
false
fi
echo $count_num
if [ $count_num = 0 ]
then
echo '不是周末,也不是月初'
else
##
CMD="/usr/local/mysql/bin/mysql -h 192.168.119.132 -uroot -pbi_hive -e \"delete from biuser.temp_period_order_cnt where etldate='${vDay}' AND data_type='1';\""
EXESHELL
# 同步数据到 mysql
CMD="/home/hadoop/hadoop/sqoop-1.4.4/bin/sqoop export --connect jdbc:mysql://192.168.119.132:3306/bidata?characterEncoding=utf8 --username root --password xxxx --table temp_period_order_cnt --export-dir /user/hive/warehouse/biuser.db/temp_period_order_cnt/dt=${vDay}/datatype=1 --input-fields-terminated-by '|' --null-non-string '0' --null-string '0';"
EXESHELL
fi