一、漏斗分析(点击率购买率)
### --- 需求分析
~~~ # 分时统计:
~~~ 点击率 = 点击次数 / 曝光次数
~~~ 购买率 = 购买次数 / 点击次数
二、创建ADS层表
### --- 创建ADS层表
~~~ # 语法:创建ADS层表
drop table if exists ads.ads_ad_show_rate;
create table ads.ads_ad_show_rate(
hour string,
click_rate double,
buy_rate double
) PARTITIONED BY (`dt` string)
row format delimited fields terminated by ',';
### --- 操作实例:创建ADS层表
hive (default)> drop table if exists ads.ads_ad_show_rate;
hive (default)>
> create table ads.ads_ad_show_rate(
> hour string,
> click_rate double,
> buy_rate double
> ) PARTITIONED BY (`dt` string)
> row format delimited fields terminated by ',';
三、曝光 点击 购买 时间(HH)行转列
### --- 数据分析
15075 15075 15075 0 01 2020-08-02 4349 4349 4349 1 01
2020-08-02 1245 1245 1245 2 01 2020-08-02 15075 4349 1245 01 2020-08-02
### --- 行转列的方法
~~~ # 方法一
select sum(case when ad_action='0' then cnt end) show_cnt,
sum(case when ad_action='1' then cnt end) click_cnt,
sum(case when ad_action='2' then cnt end) buy_cnt,
hour
from ads.ads_ad_show
where dt='2020-07-21' and hour='01'
group by hour ;
~~~ # 方法二
select max(case when ad_action='0' then cnt end) show_cnt,
max(case when ad_action='1' then cnt end) click_cnt,
max(case when ad_action='2' then cnt end) buy_cnt,
hour
from ads.ads_ad_show
where dt='2020-07-21' and hour='01'
group by hour ;
四、加载ADS层数据
### --- 加载ADS层数据脚本
[root@hadoop02 ~]# vim /data/yanqidw/script/advertisement/ads_load_ad_show_rate.sh
#!/bin/bash
source /etc/profile
if [ -n "$1" ] ;
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
with tmp as(
select max(case when ad_action='0' then cnt end) show_cnt,
max(case when ad_action='1' then cnt end) click_cnt,
max(case when ad_action='2' then cnt end) buy_cnt,
hour
from ads.ads_ad_show
where dt='$do_date'
group by hour
)
insert overwrite table ads.ads_ad_show_rate
partition (dt='$do_date')
select hour,
click_cnt / show_cnt as click_rate,
buy_cnt / click_cnt as buy_rate
from tmp;
"
hive -e "$sql"
### --- 加载ADS层数据:计算购买率
[root@hadoop02 ~]# sh /data/yanqidw/script/advertisement/ads_load_ad_show_rate.sh 2020-07-21
### --- 查看购买率
hive (default)> show partitions ads.ads_ad_show_rate;
partition
dt=2020-07-21
hive (default)> select * from ads.ads_ad_show_rate where dt='2020-07-21' limit 3;
ads_ad_show_rate.hour ads_ad_show_rate.click_rate ads_ad_show_rate.buy_rate ads_ad_show_rate.dt
00 0.2553191489361702 0.25 2020-07-21
01 0.3055555555555556 0.36363636363636365 2020-07-21
02 0.2631578947368421 0.26666666666666666 2020-07-21
hive (default)> select count(*) from ads.ads_ad_show_rate where dt='2020-07-21';
24