流量主题
各渠道流量统计
建表语句
数据装载
查看数据
路径分析
用户路径分析 : 用户在 APP 或网站中的访问路径 , 对访问路径进行分析
用户访问路径的可视化 ( 桑基图 ) 。可以真实还原用户的访问路径,如 : 页面跳转和页面访问次序
桑基图需要提供每种页面跳转的次数,每个跳转表示 : source / target
- source : 跳转起始页面
- target : 跳转终到页面
桑基图 :
建表语句
数据装载
查看数据
用户主题
用户变动统计
该需求包括两个指标,分别为流失用户数和回流用户数
建表语句
数据装载
查看分区
查看数据
用户留存率
留存分析一般包含新增留存和活跃留存分析。
新增留存分析是分析某天的新增用户中,有多少人有后续的活跃行为。活跃留存分析是分析某天的活跃用户中,有多少人有后续的活跃行为。
留存分析是衡量产品对用户价值高低的重要指标。
此处要求统计新增留存率,新增留存率具体是指留存用户数与新增用户数的比值,例如2020-06-14新增100个用户,1日之后(2020-06-15)这100人中有80个人活跃了,那2020-06-14的1日留存数则为80,2020-06-14的1日留存率则为80%。
统计每天的1至7日留存率 :
建表语句
数据装载
查看分区
查看数据
用户新增活跃统计
建表语句
数据装载
查看分区
查看数据
用户行为漏斗分析
漏斗分析是一个数据分析模型,它能够科学反映一个业务过程从起点到终点各阶段用户转化情况。由于其能将各阶段环节都展示出来,故哪个阶段存在问题,就能一目了然。
建表语句
数据装载
查看分区
查看数据
新增交易用户统计
建表语句
数据装载
查看分区
查看数据
商品主题
最近7/30日各品牌复购率
建表语句
数据装载
查看分区
查看数据
各品牌商品交易统计
建表语句
数据装载
查看分区
查看数据
各品类商品交易统计
建表语句
数据装载
查看分区
查看数据
各分类商品购物车存量Top10
建表语句
数据装载
查看分区
查看数据
交易主题
交易综合统计
建表语句
数据装载
查看分区
查看数据
各省份交易统计
建表语句
数据装载
查看分区
查看数据
优惠券主题
最近30天发布的优惠券的补贴率
建表语句
数据装载
查看分区
查看数据
活动主题
最近30天发布的活动的补贴率
建表语句
数据装载
查看分区
查看数据
数据装载脚本
/home/cpu/bin
目录下创建 dws_to_ads.sh
vim dws_to_ads.sh
内容 :
#!/bin/bash
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d "-1 day" +%F`
fi
ads_activity_stats="
insert overwrite table ${APP}.ads_activity_stats
select * from ${APP}.ads_activity_stats
union
select
'$do_date' dt,
activity_id,
activity_name,
start_date,
cast(activity_reduce_amount_30d/original_amount_30d as decimal(16,2))
from ${APP}.dws_trade_activity_order_nd
where dt='$do_date';
"
ads_coupon_stats="
insert overwrite table ${APP}.ads_coupon_stats
select * from ${APP}.ads_coupon_stats
union
select
'$do_date' dt,
coupon_id,
coupon_name,
start_date,
coupon_rule,
cast(coupon_reduce_amount_30d/original_amount_30d as decimal(16,2))
from ${APP}.dws_trade_coupon_order_nd
where dt='$do_date';
"
ads_new_buyer_stats="
insert overwrite table ${APP}.ads_new_buyer_stats
select * from ${APP}.ads_new_buyer_stats
union
select
'$do_date',
odr.recent_days,
new_order_user_count,
new_payment_user_count
from
(
select
recent_days,
sum(if(order_date_first>=date_add('$do_date',-recent_days+1),1,0)) new_order_user_count
from ${APP}.dws_trade_user_order_td lateral view explode(array(1,7,30)) tmp as recent_days
where dt='$do_date'
group by recent_days
)odr
join
(
select
recent_days,
sum(if(payment_date_first>=date_add('$do_date',-recent_days+1),1,0)) new_payment_user_count
from ${APP}.dws_trade_user_payment_td lateral view explode(array(1,7,30)) tmp as recent_days
where dt='$do_date'
group by recent_days
)pay
on odr.recent_days=pay.recent_days;
"
ads_order_by_province="
insert overwrite table ${APP}.ads_order_by_province
select * from ${APP}.ads_order_by_province
union
select
'$do_date' dt,
1 recent_days,
province_id,
province_name,
area_code,
iso_code,
iso_3166_2,
order_count_1d,
order_total_amount_1d
from ${APP}.dws_trade_province_order_1d
where dt='$do_date'
union
select
'$do_date' dt,
recent_days,
province_id,
province_name,
area_code,
iso_code,
iso_3166_2,
sum(order_count),
sum(order_total_amount)
from
(
select
recent_days,
province_id,
province_name,
area_code,
iso_code,
iso_3166_2,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count,
case recent_days
when 7 then order_total_amount_7d
when 30 then order_total_amount_30d
end order_total_amount
from ${APP}.dws_trade_province_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,province_id,province_name,area_code,iso_code,iso_3166_2;
"
ads_page_path="
insert overwrite table ${APP}.ads_page_path
select * from ${APP}.ads_page_path
union
select
'$do_date' dt,
recent_days,
source,
nvl(target,'null'),
count(*) path_count
from
(
select
recent_days,
concat('step-',rn,':',page_id) source,
concat('step-',rn+1,':',next_page_id) target
from
(
select
recent_days,
page_id,
lead(page_id,1,null) over(partition by session_id,recent_days) next_page_id,
row_number() over (partition by session_id,recent_days order by view_time) rn
from ${APP}.dwd_traffic_page_view_inc lateral view explode(array(1,7,30)) tmp as recent_days
where dt>=date_add('$do_date',-recent_days+1)
)t1
)t2
group by recent_days,source,target;
"
ads_repeat_purchase_by_tm="
insert overwrite table ${APP}.ads_repeat_purchase_by_tm
select * from ${APP}.ads_repeat_purchase_by_tm
union
select
'$do_date' dt,
recent_days,
tm_id,
tm_name,
cast(sum(if(order_count>=2,1,0))/sum(if(order_count>=1,1,0)) as decimal(16,2))
from
(
select
'$do_date' dt,
recent_days,
user_id,
tm_id,
tm_name,
sum(order_count) order_count
from
(
select
recent_days,
user_id,
tm_id,
tm_name,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count
from ${APP}.dws_trade_user_sku_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,user_id,tm_id,tm_name
)t2
group by recent_days,tm_id,tm_name;
"
ads_sku_cart_num_top3_by_cate="
insert overwrite table ${APP}.ads_sku_cart_num_top3_by_cate
select * from ${APP}.ads_sku_cart_num_top3_by_cate
union
select
'$do_date' dt,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
sku_id,
sku_name,
cart_num,
rk
from
(
select
sku_id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
cart_num,
rank() over (partition by category1_id,category2_id,category3_id order by cart_num desc) rk
from
(
select
sku_id,
sum(sku_num) cart_num
from ${APP}.dwd_trade_cart_full
where dt='$do_date'
group by sku_id
)cart
left join
(
select
id,
sku_name,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name
from ${APP}.dim_sku_full
where dt='$do_date'
)sku
on cart.sku_id=sku.id
)t1
where rk<=3;
"
ads_trade_stats="
insert overwrite table ${APP}.ads_trade_stats
select * from ${APP}.ads_trade_stats
union
select
'$do_date',
odr.recent_days,
order_total_amount,
order_count,
order_user_count,
order_refund_count,
order_refund_user_count
from
(
select
1 recent_days,
sum(order_total_amount_1d) order_total_amount,
sum(order_count_1d) order_count,
count(*) order_user_count
from ${APP}.dws_trade_user_order_1d
where dt='$do_date'
union all
select
recent_days,
sum(order_total_amount),
sum(order_count),
sum(if(order_count>0,1,0))
from
(
select
recent_days,
case recent_days
when 7 then order_total_amount_7d
when 30 then order_total_amount_30d
end order_total_amount,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count
from ${APP}.dws_trade_user_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days
)odr
join
(
select
1 recent_days,
sum(order_refund_count_1d) order_refund_count,
count(*) order_refund_user_count
from ${APP}.dws_trade_user_order_refund_1d
where dt='$do_date'
union all
select
recent_days,
sum(order_refund_count),
sum(if(order_refund_count>0,1,0))
from
(
select
recent_days,
case recent_days
when 7 then order_refund_count_7d
when 30 then order_refund_count_30d
end order_refund_count
from ${APP}.dws_trade_user_order_refund_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days
)refund
on odr.recent_days=refund.recent_days;
"
ads_trade_stats_by_cate="
insert overwrite table ${APP}.ads_trade_stats_by_cate
select * from ${APP}.ads_trade_stats_by_cate
union
select
'$do_date' dt,
nvl(odr.recent_days,refund.recent_days),
nvl(odr.category1_id,refund.category1_id),
nvl(odr.category1_name,refund.category1_name),
nvl(odr.category2_id,refund.category2_id),
nvl(odr.category2_name,refund.category2_name),
nvl(odr.category3_id,refund.category3_id),
nvl(odr.category3_name,refund.category3_name),
nvl(order_count,0),
nvl(order_user_count,0),
nvl(order_refund_count,0),
nvl(order_refund_user_count,0)
from
(
select
1 recent_days,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
sum(order_count_1d) order_count,
count(distinct(user_id)) order_user_count
from ${APP}.dws_trade_user_sku_order_1d
where dt='$do_date'
group by category1_id,category1_name,category2_id,category2_name,category3_id,category3_name
union all
select
recent_days,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
sum(order_count),
count(distinct(if(order_count>0,user_id,null)))
from
(
select
recent_days,
user_id,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count
from ${APP}.dws_trade_user_sku_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name
)odr
full outer join
(
select
1 recent_days,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
sum(order_refund_count_1d) order_refund_count,
count(distinct(user_id)) order_refund_user_count
from ${APP}.dws_trade_user_sku_order_refund_1d
where dt='$do_date'
group by category1_id,category1_name,category2_id,category2_name,category3_id,category3_name
union all
select
recent_days,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
sum(order_refund_count),
count(distinct(if(order_refund_count>0,user_id,null)))
from
(
select
recent_days,
user_id,
category1_id,
category1_name,
category2_id,
category2_name,
category3_id,
category3_name,
case recent_days
when 7 then order_refund_count_7d
when 30 then order_refund_count_30d
end order_refund_count
from ${APP}.dws_trade_user_sku_order_refund_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name
)refund
on odr.recent_days=refund.recent_days
and odr.category1_id=refund.category1_id
and odr.category1_name=refund.category1_name
and odr.category2_id=refund.category2_id
and odr.category2_name=refund.category2_name
and odr.category3_id=refund.category3_id
and odr.category3_name=refund.category3_name;
"
ads_trade_stats_by_tm="
insert overwrite table ${APP}.ads_trade_stats_by_tm
select * from ${APP}.ads_trade_stats_by_tm
union
select
'$do_date' dt,
nvl(odr.recent_days,refund.recent_days),
nvl(odr.tm_id,refund.tm_id),
nvl(odr.tm_name,refund.tm_name),
nvl(order_count,0),
nvl(order_user_count,0),
nvl(order_refund_count,0),
nvl(order_refund_user_count,0)
from
(
select
1 recent_days,
tm_id,
tm_name,
sum(order_count_1d) order_count,
count(distinct(user_id)) order_user_count
from ${APP}.dws_trade_user_sku_order_1d
where dt='$do_date'
group by tm_id,tm_name
union all
select
recent_days,
tm_id,
tm_name,
sum(order_count),
count(distinct(if(order_count>0,user_id,null)))
from
(
select
recent_days,
user_id,
tm_id,
tm_name,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count
from ${APP}.dws_trade_user_sku_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,tm_id,tm_name
)odr
full outer join
(
select
1 recent_days,
tm_id,
tm_name,
sum(order_refund_count_1d) order_refund_count,
count(distinct(user_id)) order_refund_user_count
from ${APP}.dws_trade_user_sku_order_refund_1d
where dt='$do_date'
group by tm_id,tm_name
union all
select
recent_days,
tm_id,
tm_name,
sum(order_refund_count),
count(if(order_refund_count>0,user_id,null))
from
(
select
recent_days,
user_id,
tm_id,
tm_name,
case recent_days
when 7 then order_refund_count_7d
when 30 then order_refund_count_30d
end order_refund_count
from ${APP}.dws_trade_user_sku_order_refund_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days,tm_id,tm_name
)refund
on odr.recent_days=refund.recent_days
and odr.tm_id=refund.tm_id
and odr.tm_name=refund.tm_name;
"
ads_traffic_stats_by_channel="
insert overwrite table ${APP}.ads_traffic_stats_by_channel
select * from ${APP}.ads_traffic_stats_by_channel
union
select
'$do_date' dt,
recent_days,
channel,
cast(count(distinct(mid_id)) as bigint) uv_count,
cast(avg(during_time_1d)/1000 as bigint) avg_duration_sec,
cast(avg(page_count_1d) as bigint) avg_page_count,
cast(count(*) as bigint) sv_count,
cast(sum(if(page_count_1d=1,1,0))/count(*) as decimal(16,2)) bounce_rate
from ${APP}.dws_traffic_session_page_view_1d lateral view explode(array(1,7,30)) tmp as recent_days
where dt>=date_add('$do_date',-recent_days+1)
group by recent_days,channel;
"
ads_user_action="
insert overwrite table ${APP}.ads_user_action
select * from ${APP}.ads_user_action
union
select
'$do_date' dt,
page.recent_days,
home_count,
good_detail_count,
cart_count,
order_count,
payment_count
from
(
select
1 recent_days,
sum(if(page_id='home',1,0)) home_count,
sum(if(page_id='good_detail',1,0)) good_detail_count
from ${APP}.dws_traffic_page_visitor_page_view_1d
where dt='$do_date'
and page_id in ('home','good_detail')
union all
select
recent_days,
sum(if(page_id='home' and view_count>0,1,0)),
sum(if(page_id='good_detail' and view_count>0,1,0))
from
(
select
recent_days,
page_id,
case recent_days
when 7 then view_count_7d
when 30 then view_count_30d
end view_count
from ${APP}.dws_traffic_page_visitor_page_view_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
and page_id in ('home','good_detail')
)t1
group by recent_days
)page
join
(
select
1 recent_days,
count(*) cart_count
from ${APP}.dws_trade_user_cart_add_1d
where dt='$do_date'
union all
select
recent_days,
sum(if(cart_count>0,1,0))
from
(
select
recent_days,
case recent_days
when 7 then cart_add_count_7d
when 30 then cart_add_count_30d
end cart_count
from ${APP}.dws_trade_user_cart_add_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days
)cart
on page.recent_days=cart.recent_days
join
(
select
1 recent_days,
count(*) order_count
from ${APP}.dws_trade_user_order_1d
where dt='$do_date'
union all
select
recent_days,
sum(if(order_count>0,1,0))
from
(
select
recent_days,
case recent_days
when 7 then order_count_7d
when 30 then order_count_30d
end order_count
from ${APP}.dws_trade_user_order_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days
)ord
on page.recent_days=ord.recent_days
join
(
select
1 recent_days,
count(*) payment_count
from ${APP}.dws_trade_user_payment_1d
where dt='$do_date'
union all
select
recent_days,
sum(if(order_count>0,1,0))
from
(
select
recent_days,
case recent_days
when 7 then payment_count_7d
when 30 then payment_count_30d
end order_count
from ${APP}.dws_trade_user_payment_nd lateral view explode(array(7,30)) tmp as recent_days
where dt='$do_date'
)t1
group by recent_days
)pay
on page.recent_days=pay.recent_days;
"
ads_user_change="
insert overwrite table ${APP}.ads_user_change
select * from ${APP}.ads_user_change
union
select
churn.dt,
user_churn_count,
user_back_count
from
(
select
'$do_date' dt,
count(*) user_churn_count
from ${APP}.dws_user_user_login_td
where dt='$do_date'
and login_date_last=date_add('$do_date',-7)
)churn
join
(
select
'$do_date' dt,
count(*) user_back_count
from
(
select
user_id,
login_date_last
from ${APP}.dws_user_user_login_td
where dt='$do_date'
)t1
join
(
select
user_id,
login_date_last login_date_previous
from ${APP}.dws_user_user_login_td
where dt=date_add('$do_date',-1)
)t2
on t1.user_id=t2.user_id
where datediff(login_date_last,login_date_previous)>=8
)back
on churn.dt=back.dt;
"
ads_user_retention="
insert overwrite table ${APP}.ads_user_retention
select * from ${APP}.ads_user_retention
union
select
'$do_date' dt,
login_date_first create_date,
datediff('$do_date',login_date_first) retention_day,
sum(if(login_date_last='$do_date',1,0)) retention_count,
count(*) new_user_count,
cast(sum(if(login_date_last='$do_date',1,0))/count(*)*100 as decimal(16,2)) retention_rate
from
(
select
user_id,
date_id login_date_first
from ${APP}.dwd_user_register_inc
where dt>=date_add('$do_date',-7)
and dt<'$do_date'
)t1
join
(
select
user_id,
login_date_last
from ${APP}.dws_user_user_login_td
where dt='$do_date'
)t2
on t1.user_id=t2.user_id
group by login_date_first;
"
ads_user_stats="
insert overwrite table ${APP}.ads_user_stats
select * from ${APP}.ads_user_stats
union
select
'$do_date' dt,
t1.recent_days,
new_user_count,
active_user_count
from
(
select
recent_days,
sum(if(login_date_last>=date_add('$do_date',-recent_days+1),1,0)) new_user_count
from ${APP}.dws_user_user_login_td lateral view explode(array(1,7,30)) tmp as recent_days
where dt='$do_date'
group by recent_days
)t1
join
(
select
recent_days,
sum(if(date_id>=date_add('$do_date',-recent_days+1),1,0)) active_user_count
from ${APP}.dwd_user_register_inc lateral view explode(array(1,7,30)) tmp as recent_days
group by recent_days
)t2
on t1.recent_days=t2.recent_days;
"
case $1 in
"ads_activity_stats" )
hive -e "$ads_activity_stats"
;;
"ads_coupon_stats" )
hive -e "$ads_coupon_stats"
;;
"ads_new_buyer_stats" )
hive -e "$ads_new_buyer_stats"
;;
"ads_order_by_province" )
hive -e "$ads_order_by_province"
;;
"ads_page_path" )
hive -e "$ads_page_path"
;;
"ads_repeat_purchase_by_tm" )
hive -e "$ads_repeat_purchase_by_tm"
;;
"ads_sku_cart_num_top3_by_cate" )
hive -e "$ads_sku_cart_num_top3_by_cate"
;;
"ads_trade_stats" )
hive -e "$ads_trade_stats"
;;
"ads_trade_stats_by_cate" )
hive -e "$ads_trade_stats_by_cate"
;;
"ads_trade_stats_by_tm" )
hive -e "$ads_trade_stats_by_tm"
;;
"ads_traffic_stats_by_channel" )
hive -e "$ads_traffic_stats_by_channel"
;;
"ads_user_action" )
hive -e "$ads_user_action"
;;
"ads_user_change" )
hive -e "$ads_user_change"
;;
"ads_user_retention" )
hive -e "$ads_user_retention"
;;
"ads_user_stats" )
hive -e "$ads_user_stats"
;;
"all" )
hive -e "$ads_activity_stats$ads_coupon_stats$ads_new_buyer_stats$ads_order_by_province$ads_page_path$ads_repeat_purchase_by_tm$ads_sku_cart_num_top3_by_cate$ads_trade_stats$ads_trade_stats_by_cate$ads_trade_stats_by_tm$ads_traffic_stats_by_channel$ads_user_action$ads_user_change$ads_user_retention$ads_user_stats"
;;
esac
增加脚本执行权限
chmod 777 dws_to_ads.sh
脚本用法
dws_to_ads.sh all 2020-06-14