一、创建ADS层表
### --- 计算当天、当周、当月活跃会员数量
~~~ # 语法
use ads;
drop table if exists ads.ads_member_active_count;
create table ads.ads_member_active_count(
`day_count` int COMMENT '当日会员数量',
`week_count` int COMMENT '当周会员数量',
`month_count` int COMMENT '当月会员数量'
) COMMENT '活跃会员数'
partitioned by(dt string)
row format delimited fields terminated by ',';
~~~ # 实例操作
hive (dws)> use ads;
OK
hive (ads)> drop table if exists ads.ads_member_active_count;
OK
hive (ads)> create table ads.ads_member_active_count(
> `day_count` int COMMENT '当日会员数量',
> `week_count` int COMMENT '当周会员数量',
> `month_count` int COMMENT '当月会员数量'
> ) COMMENT '活跃会员数'
> partitioned by(dt string)
> row format delimited fields terminated by ',';
OK
二、加载ADS层数据
### --- 创建ADS层加载数据脚本
[root@hadoop02 ~]# vim /data/yanqidw/script/member_active/ads_load_member_active.sh
# 脚本一
#!/bin/bash
source /etc/profile
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
with tmp as(
select 'day' datelabel, count(*) cnt, dt
from dws.dws_member_start_day
where dt='$do_date'
group by dt
union all
select 'week' datelabel, count(*) cnt, dt
from dws.dws_member_start_week
where dt='$do_date'
group by dt
union all
select 'month' datelabel, count(*) cnt, dt
from dws.dws_member_start_month
where dt='$do_date'
group by dt
)
insert overwrite table ads.ads_member_active_count
partition(dt='$do_date')
select sum(case when datelabel='day' then cnt end) as
day_count,
sum(case when datelabel='week' then cnt end) as
week_count,
sum(case when datelabel='month' then cnt end) as
month_count
from tmp
group by dt;
"
hive -e "$sql"
[root@hadoop02 ~]# vim /data/yanqidw/script/member_active/ads1_load_member_active.sh
# 脚本二
#!/bin/bash
source /etc/profile
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table ads.ads_member_active_count
partition(dt='$do_date')
select daycnt, weekcnt, monthcnt
from (select dt, count(*) daycnt
from dws.dws_member_start_day
where dt='$do_date'
group by dt
) day join
(select dt, count(*) weekcnt
from dws.dws_member_start_week
where dt='$do_date'
group by dt
) week on day.dt=week.dt
join
(select dt, count(*) monthcnt
from dws.dws_member_start_month
where dt='$do_date'
group by dt
) month on day.dt=month.dt;
"
hive -e "$sql"
### --- 为脚本授予可执行权限并验证
[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/ads_load_member_active.sh
[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/ads1_load_member_active.sh
~~~ # 执行脚本0
[root@hadoop02 ~]# /data/yanqidw/script/member_active/ads_load_member_active.sh
~~~ # 验证结果
hive (ads)> show partitions ads.ads_member_active_count;
partition
dt=2021-09-28
~~~ # 求和验证
hive (ads)> select * from ads.ads_member_active_count;
OK
ads_member_active_count.day_count ads_member_active_count.week_count ads_member_active_count.month_count ads_member_active_count.dt
9999 9999 9999 2021-09-28
### --- 执行脚本1
~~~ # 删除加载的数据
hive (ads)> alter table ads.ads_member_active_count drop partition (dt='2021-09-28');
Dropped the partition dt=2021-09-28
hive (ads)> show partitions ads.ads_member_active_count;
partition
~~~ # 加载数据
[root@hadoop02 ~]# /data/yanqidw/script/member_active/ads1_load_member_active.sh
~~~ # 验证数据
hive (ads)> show partitions ads.ads_member_active_count;
partition
dt=2021-09-28
hive (ads)> select * from ads.ads_member_active_count;
ads_member_active_count.day_count ads_member_active_count.week_count ads_member_active_count.month_count ads_member_active_count.dt
9999 9999 9999 2021-09-28
三、小结.1、架构图
### --- 脚本执行次序:
[root@hadoop02 ~]# cd /data/yanqidw/script/member_active/
./ods_load_startlog.sh
./dwd_load_startlog.sh
./dws_load_member_start.sh
./ads_load_member_active.sh