前言
本文章主要讲解,当漏斗分析时,数据需要通过关联多个表进行获取数据,并且多个周期分析时数据出现重叠现象,针对两个点进行优化。
1.请提取新增订购4天内各个周期的,用户数留存情况、呼转情况、短信下发情况
set hivevar:v_dt=2022-06-01;
select dt
, nvl(sum(new_ord_ucn_day),0) as new_ord_ucn_day
, nvl(sum(in_ord_cn1) ,0) as in_ord_cn1
, nvl(sum(in_sk_cn1) ,0) as in_sk_cn1
, nvl(sum(in_sms_down_cn1),0) as in_sms_down_cn1
, nvl(sum(in_ord_cn2) ,0) as in_ord_cn2
, nvl(sum(in_sk_cn2) ,0) as in_sk_cn2
, nvl(sum(in_sms_down_cn2),0) as in_sms_down_cn2
, nvl(sum(in_ord_cn3) ,0) as in_ord_cn3
, nvl(sum(in_sk_cn3) ,0) as in_sk_cn3
, nvl(sum(in_sms_down_cn3),0) as in_sms_down_cn3
, nvl(sum(in_ord_cn4) ,0) as in_ord_cn4
, nvl(sum(in_sk_cn4) ,0) as in_sk_cn4
, nvl(sum(in_sms_down_cn4),0) as in_sms_down_cn4
from (
select
–按统计周期划分
case
when a.dt in('a1','a2','a3','a4') then '${hivevar:v_dt}'
when a.dt in('b1','b2','b3') then date_add('${hivevar:v_dt}',1)
when a.dt in('c1','c2') then date_add('${hivevar:v_dt}',2)
when a.dt in('d1') then date_add('${hivevar:v_dt}',3)
end as dt
--a.dt=a1、a2、a3、a4限制漏斗周期范围,a.dt、b.dt、c.dt、d.dt=a1、a1、a1、a1限制漏斗数据范围
,case when a.dt='a1' then 1 when a.dt='b1' then 1 when a.dt='c1' then 1 when a.dt='d1' then 1 end as new_ord_ucn_day --当日新增订购用户数
,case when a.dt='a1' and b.dt='a1' then 1 when a.dt='b1' and b.dt='b1' then 1 when a.dt='c1' and b.dt='c1' then 1 when a.dt='d1' and b.dt='d1' then 1 end as in_ord_cn1 --第1天留存用户数
,case when a.dt='a1' and b.dt='a1' and c.dt='a1' then 1 when a.dt='b1' and b.dt='b1' and c.dt='b1' then 1 when a.dt='c1' and b.dt='c1' and c.dt='c1' then 1 when a.dt='d1' and b.dt='d1' and c.dt='d1' then 1 end as in_sk_cn1 --第1天留存有呼转用户数
,case when a.dt='a1' and b.dt='a1' and c.dt='a1' and d.dt='a1' then 1 when a.dt='b1' and b.dt='b1' and c.dt='b1' and d.dt='b1' then 1 when a.dt='c1' and b.dt='c1' and c.dt='c1' and d.dt='c1' then 1 when a.dt='d1' and b.dt='d1' and c.dt='d1' and d.dt='d1' then 1 end as in_sms_down_cn1 --第1天留存有呼转有短信下发用户数
,case when a.dt='a2' and b.dt='a2' then 1 when a.dt='b2' and b.dt='b2' then 1 when a.dt='c2' and b.dt='c2' then 1 end as in_ord_cn2 --第2天留存用户数
,case when a.dt='a2' and b.dt='a2' and c.dt='a2' then 1 when a.dt='b2' and b.dt='b2' and c.dt='b2' then 1 when a.dt='c2' and b.dt='c2' and c.dt='c2' then 1 end as in_sk_cn2 --第2天留存有呼转用户数
,case when a.dt='a2' and b.dt='a2' and c.dt='a2' and d.dt='a2' then 1 when a.dt='b2' and b.dt='b2' and c.dt='b2' and d.dt='b2' then 1 when a.dt='c2' and b.dt='c2' and c.dt='c2' and d.dt='c2' then 1 end as in_sms_down_cn2 --第2天留存有呼转有短信下发用户数
,case when a.dt='a3' and b.dt='a3' then 1 when a.dt='b3' and b.dt='b3' then 1 end as in_ord_cn3 --第3天留存用户数
,case when a.dt='a3' and b.dt='a3' and c.dt='a3' then 1 when a.dt='b3' and b.dt='b3' and c.dt='b3' then 1 end as in_sk_cn3 --第3天留存有呼转用户数
,case when a.dt='a3' and b.dt='a3' and c.dt='a3' and d.dt='a3' then 1 when a.dt='b3' and b.dt='b3' and c.dt='b3' and d.dt='b3' then 1 end as in_sms_down_cn3 --第3天留存有呼转有短信下发用户数
,case when a.dt='a4' and b.dt='a4' then 1 end as in_ord_cn4 --第4天留存用户数
,case when a.dt='a4' and b.dt='a4' and c.dt='a4' then 1 end as in_sk_cn4 --第4天留存有呼转用户数
,case when a.dt='a4' and b.dt='a4' and c.dt='a4' and d.dt='a4' then 1 end as in_sms_down_cn4 --第4天留存有呼转有短信下发用户数
from (
--新增用户数
select dt, user_num
from (
--新增天期数据切块,按漏斗块切
select split(
concat_ws('|'
, case when dt = '${hivevar:v_dt}' then 'a1' end--漏斗周期1的漏斗数据1
, case when dt = '${hivevar:v_dt}' then 'a2' end--漏斗周期1的漏斗数据2
, case when dt = '${hivevar:v_dt}' then 'a3' end--漏斗周期1的漏斗数据3
, case when dt = '${hivevar:v_dt}' then 'a4' end--漏斗周期1的漏斗数据4
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b1' end--漏斗周期2的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b2' end--漏斗周期2的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b3' end--漏斗周期2的漏斗数据3
, case when dt = date_add('${hivevar:v_dt}', 2) then 'c1' end--漏斗周期3的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 2) then 'c2' end--漏斗周期3的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 3) then 'd1' end--漏斗周期4的漏斗数据1
), '\\|') as dt_arr,user_num
from dw.tds_status_detail_hly_norm_his t
where dt in (cast('${hivevar:v_dt}' as date) --第1天
, cast(date_add('${hivevar:v_dt}', 1) as date)--第2天
, cast(date_add('${hivevar:v_dt}', 2) as date)--第3天
, cast(date_add('${hivevar:v_dt}', 3) as date)--第4天
) and opt = '27' and prov_id = '44'
) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num
) a
--留存用户数
LEFT JOIN (
select dt, user_num
from (
**--留存天期数据切块,按漏斗块切**
select split(concat_ws('|'
, case when dt = date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3
, case when dt = date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4
, case when dt = date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3
, case when dt = date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1
), '\\|') as dt_arr,user_num
from dw.ads_status_hly_daily t
where dt in (
cast(date_add('${hivevar:v_dt}', 1) as date)--第1天
, cast(date_add('${hivevar:v_dt}', 2) as date)--第2天
, cast(date_add('${hivevar:v_dt}', 3) as date)--第3天
, cast(date_add('${hivevar:v_dt}', 4) as date)--第4天
) and status = 1 and prov_id = '44'
) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num
) b
ON A.user_num = b.user_num and A.dt = b.dt--漏斗块关联,主要方便后面组成需要的漏斗分析
--呼转用户数
LEFT JOIN (
select dt, user_num
from (
--呼转天期数据切块,按漏斗块切
select split(concat_ws('|'
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3
, case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1
, case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2
, case when dt >= date_add('${hivevar:v_dt}', 3) and dt <= date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1
), '\\|') as dt_arr, user_num
from dw.dws_sk_called_m_daily t where dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4) and data_type = 'all' and prov_id = '44' and sk_day_cn > 0
) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num
) c
ON A.user_num = c.user_num and A.dt = c.dt**--漏斗块关联,主要方便后面组成需要的漏斗分析**
--短信下发用户数
LEFT JOIN (
select dt, user_num
from (
--短信下发天期数据切块,按漏斗块切
select split(concat_ws('|'
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3
, case when dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2
, case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3
, case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1
, case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2
, case when dt >= date_add('${hivevar:v_dt}', 3) and dt <= date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1
), '\\|') as dt_arr, user_num
from dw.tds_sms_down_his t where dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4) and data_type = 1 and prov_id = '44'
) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num
) d ON A.user_num = d.user_num and A.dt = d.dt**--漏斗块关联,主要方便后面组成需要的漏斗分析**
) u1
group by dt
;
2.漏斗分析数据结果
dt new_ord_ucn_day in_ord_cn1 in_sk_cn1 in_sms_down_cn1 in_ord_cn2 in_sk_cn2 in_sms_down_cn2 in_ord_cn3 in_sk_cn3 in_sms_down_cn3 in_ord_cn4 in_sk_cn4 in_sms_down_cn4
2022-06-01 53050 52469 2108 51 52336 2496 61 52204 2805 69 52051 3159 80
2022-06-02 4096 3931 689 51 3918 834 68 3908 978 81 0 0 0
2022-06-03 4715 4540 799 82 4529 987 103 0 0 0 0 0 0
2022-06-04 4546 4367 682 63 0 0 0 0 0 0 0 0 0
Time taken: 69.733 seconds, Fetched: 4 row(s)
3.炸裂数据周期的作用
主要把需要关联的数据,按周期划分炸裂成一块一块,方便后面and 各个表的块数据,组成需要的漏斗分析
列子: ,case when a.dt=‘a4’ and b.dt=‘a4’ and c.dt=‘a4’ and d.dt=‘a4’ then 1 end as in_sms_down_cn4 --第4天留存有呼转有短信下发用户数
–新增天期数据切块,按漏斗块切
select split(
concat_ws('|'
, case when dt = '${hivevar:v_dt}' then 'a1' end--漏斗周期1的漏斗数据1
, case when dt = '${hivevar:v_dt}' then 'a2' end--漏斗周期1的漏斗数据2
, case when dt = '${hivevar:v_dt}' then 'a3' end--漏斗周期1的漏斗数据3
, case when dt = '${hivevar:v_dt}' then 'a4' end--漏斗周期1的漏斗数据4
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b1' end--漏斗周期2的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b2' end--漏斗周期2的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 1) then 'b3' end--漏斗周期2的漏斗数据3
, case when dt = date_add('${hivevar:v_dt}', 2) then 'c1' end--漏斗周期3的漏斗数据1
, case when dt = date_add('${hivevar:v_dt}', 2) then 'c2' end--漏斗周期3的漏斗数据2
, case when dt = date_add('${hivevar:v_dt}', 3) then 'd1' end--漏斗周期4的漏斗数据1
), '\\|') as dt_arr,user_num
from dw.tds_status_detail_hly_norm_his t
where dt in (cast('${hivevar:v_dt}' as date) --第1天
, cast(date_add('${hivevar:v_dt}', 1) as date)--第2天
, cast(date_add('${hivevar:v_dt}', 2) as date)--第3天
, cast(date_add('${hivevar:v_dt}', 3) as date)--第4天
) and opt = '27' and prov_id = '44'
) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num