漏斗周期漏斗数据关联优化

前言

本文章主要讲解,当漏斗分析时,数据需要通过关联多个表进行获取数据,并且多个周期分析时数据出现重叠现象,针对两个点进行优化。


1.请提取新增订购4天内各个周期的,用户数留存情况、呼转情况、短信下发情况

set hivevar:v_dt=2022-06-01;

select dt

, nvl(sum(new_ord_ucn_day),0) as new_ord_ucn_day

, nvl(sum(in_ord_cn1) ,0) as in_ord_cn1

, nvl(sum(in_sk_cn1) ,0) as in_sk_cn1

, nvl(sum(in_sms_down_cn1),0) as in_sms_down_cn1

, nvl(sum(in_ord_cn2) ,0) as in_ord_cn2

, nvl(sum(in_sk_cn2) ,0) as in_sk_cn2

, nvl(sum(in_sms_down_cn2),0) as in_sms_down_cn2

, nvl(sum(in_ord_cn3) ,0) as in_ord_cn3

, nvl(sum(in_sk_cn3) ,0) as in_sk_cn3

, nvl(sum(in_sms_down_cn3),0) as in_sms_down_cn3

, nvl(sum(in_ord_cn4) ,0) as in_ord_cn4

, nvl(sum(in_sk_cn4) ,0) as in_sk_cn4

, nvl(sum(in_sms_down_cn4),0) as in_sms_down_cn4

from (

     select

–按统计周期划分

     case

     when a.dt in('a1','a2','a3','a4') then '${hivevar:v_dt}'

     when a.dt in('b1','b2','b3')      then date_add('${hivevar:v_dt}',1)

     when a.dt in('c1','c2')           then date_add('${hivevar:v_dt}',2)

     when a.dt in('d1')                then date_add('${hivevar:v_dt}',3)

     end  as dt

     --a.dt=a1、a2、a3、a4限制漏斗周期范围,a.dt、b.dt、c.dt、d.dt=a1、a1、a1、a1限制漏斗数据范围

     ,case when a.dt='a1'                                           then 1 when a.dt='b1'                                           then 1 when a.dt='c1'                                             then 1 when a.dt='d1'                                             then 1 end as new_ord_ucn_day       --当日新增订购用户数

     ,case when a.dt='a1' and b.dt='a1'                             then 1 when a.dt='b1' and b.dt='b1'                             then 1 when a.dt='c1' and b.dt='c1'                               then 1 when a.dt='d1' and b.dt='d1'                               then 1 end as in_ord_cn1            --第1天留存用户数

     ,case when a.dt='a1' and b.dt='a1' and c.dt='a1'               then 1 when a.dt='b1' and b.dt='b1' and c.dt='b1'               then 1 when a.dt='c1' and b.dt='c1' and  c.dt='c1'                then 1 when a.dt='d1' and b.dt='d1' and  c.dt='d1'                then 1 end as in_sk_cn1             --第1天留存有呼转用户数

     ,case when a.dt='a1' and b.dt='a1' and c.dt='a1' and d.dt='a1' then 1 when a.dt='b1' and b.dt='b1' and c.dt='b1' and d.dt='b1' then 1 when a.dt='c1' and b.dt='c1' and  c.dt='c1' and  d.dt='c1' then 1 when a.dt='d1' and b.dt='d1' and  c.dt='d1' and  d.dt='d1' then 1 end as in_sms_down_cn1       --第1天留存有呼转有短信下发用户数

     ,case when a.dt='a2' and b.dt='a2'                             then 1 when a.dt='b2' and b.dt='b2'                             then 1 when a.dt='c2' and b.dt='c2'                               then 1 end as in_ord_cn2            --第2天留存用户数

     ,case when a.dt='a2' and b.dt='a2' and c.dt='a2'               then 1 when a.dt='b2' and b.dt='b2' and c.dt='b2'               then 1 when a.dt='c2' and b.dt='c2' and  c.dt='c2'                then 1 end as in_sk_cn2             --第2天留存有呼转用户数

     ,case when a.dt='a2' and b.dt='a2' and c.dt='a2' and d.dt='a2' then 1 when a.dt='b2' and b.dt='b2' and c.dt='b2' and d.dt='b2' then 1 when a.dt='c2' and b.dt='c2' and  c.dt='c2' and  d.dt='c2' then 1 end as in_sms_down_cn2       --第2天留存有呼转有短信下发用户数

     ,case when a.dt='a3' and b.dt='a3'                             then 1 when a.dt='b3' and b.dt='b3'                             then 1 end as in_ord_cn3            --第3天留存用户数

     ,case when a.dt='a3' and b.dt='a3' and c.dt='a3'               then 1 when a.dt='b3' and b.dt='b3' and c.dt='b3'               then 1 end as in_sk_cn3             --第3天留存有呼转用户数

     ,case when a.dt='a3' and b.dt='a3' and c.dt='a3' and d.dt='a3' then 1 when a.dt='b3' and b.dt='b3' and c.dt='b3' and d.dt='b3' then 1 end as in_sms_down_cn3       --第3天留存有呼转有短信下发用户数

     ,case when a.dt='a4' and b.dt='a4'                             then 1 end as in_ord_cn4            --第4天留存用户数

     ,case when a.dt='a4' and b.dt='a4' and c.dt='a4'               then 1 end as in_sk_cn4             --第4天留存有呼转用户数

     ,case when a.dt='a4' and b.dt='a4' and c.dt='a4' and d.dt='a4' then 1 end as in_sms_down_cn4       --第4天留存有呼转有短信下发用户数

     from (

     --新增用户数

              select dt, user_num

              from (

     --新增天期数据切块,按漏斗块切

                       select split(

                                      concat_ws('|'

                                          , case when dt = '${hivevar:v_dt}'              then 'a1' end--漏斗周期1的漏斗数据1

                                          , case when dt = '${hivevar:v_dt}'              then 'a2' end--漏斗周期1的漏斗数据2

                                          , case when dt = '${hivevar:v_dt}'              then 'a3' end--漏斗周期1的漏斗数据3

                                          , case when dt = '${hivevar:v_dt}'              then 'a4' end--漏斗周期1的漏斗数据4

                                          , case when dt = date_add('${hivevar:v_dt}', 1) then 'b1' end--漏斗周期2的漏斗数据1

                                          , case when dt = date_add('${hivevar:v_dt}', 1) then 'b2' end--漏斗周期2的漏斗数据2

                                          , case when dt = date_add('${hivevar:v_dt}', 1) then 'b3' end--漏斗周期2的漏斗数据3

                                          , case when dt = date_add('${hivevar:v_dt}', 2) then 'c1' end--漏斗周期3的漏斗数据1

                                          , case when dt = date_add('${hivevar:v_dt}', 2) then 'c2' end--漏斗周期3的漏斗数据2

                                          , case when dt = date_add('${hivevar:v_dt}', 3) then 'd1' end--漏斗周期4的漏斗数据1

                                          ), '\\|') as dt_arr,user_num

                       from dw.tds_status_detail_hly_norm_his t

                       where dt in (cast('${hivevar:v_dt}' as date)      --第1天

                           , cast(date_add('${hivevar:v_dt}', 1) as date)--第2天

                           , cast(date_add('${hivevar:v_dt}', 2) as date)--第3天

                           , cast(date_add('${hivevar:v_dt}', 3) as date)--第4天

                           ) and opt = '27' and prov_id = '44'

                   ) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num

          ) a

     --留存用户数

              LEFT JOIN (

         select dt, user_num

         from (

     **--留存天期数据切块,按漏斗块切**

                  select split(concat_ws('|'

                                   , case when dt = date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1

                                   , case when dt = date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2

                                   , case when dt = date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3

                                   , case when dt = date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4

                                   , case when dt = date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1

                                   , case when dt = date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2

                                   , case when dt = date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3

                                   , case when dt = date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1

                                   , case when dt = date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2

                                   , case when dt = date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1

                                   ), '\\|') as dt_arr,user_num

                  from dw.ads_status_hly_daily t

                  where dt in (

                        cast(date_add('${hivevar:v_dt}', 1) as date)--第1天

                      , cast(date_add('${hivevar:v_dt}', 2) as date)--第2天

                      , cast(date_add('${hivevar:v_dt}', 3) as date)--第3天

                      , cast(date_add('${hivevar:v_dt}', 4) as date)--第4天

                      ) and status = 1 and prov_id = '44'

              ) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num

     ) b

                        ON A.user_num = b.user_num and A.dt = b.dt--漏斗块关联,主要方便后面组成需要的漏斗分析

     --呼转用户数

              LEFT JOIN (

         select dt, user_num

         from (

     --呼转天期数据切块,按漏斗块切

                  select split(concat_ws('|'

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3

                                   , case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1

                                   , case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2

                                   , case when dt >= date_add('${hivevar:v_dt}', 3) and dt <= date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1

                                   ), '\\|') as dt_arr, user_num

                  from dw.dws_sk_called_m_daily t where dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4) and data_type = 'all' and prov_id = '44' and sk_day_cn > 0

              ) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num

     ) c

                        ON A.user_num = c.user_num and A.dt = c.dt**--漏斗块关联,主要方便后面组成需要的漏斗分析**

     --短信下发用户数

              LEFT JOIN (

         select dt, user_num

         from (

     --短信下发天期数据切块,按漏斗块切

                  select split(concat_ws('|'

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 1) then 'a1' end--漏斗周期1的漏斗数据1

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 2) then 'a2' end--漏斗周期1的漏斗数据2

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 3) then 'a3' end--漏斗周期1的漏斗数据3

                                   , case when dt >= '${hivevar:v_dt}'              and dt <= date_add('${hivevar:v_dt}', 4) then 'a4' end--漏斗周期1的漏斗数据4

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 2) then 'b1' end--漏斗周期2的漏斗数据1

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 3) then 'b2' end--漏斗周期2的漏斗数据2

                                   , case when dt >= date_add('${hivevar:v_dt}', 1) and dt <= date_add('${hivevar:v_dt}', 4) then 'b3' end--漏斗周期2的漏斗数据3

                                   , case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 3) then 'c1' end--漏斗周期3的漏斗数据1

                                   , case when dt >= date_add('${hivevar:v_dt}', 2) and dt <= date_add('${hivevar:v_dt}', 4) then 'c2' end--漏斗周期3的漏斗数据2

                                   , case when dt >= date_add('${hivevar:v_dt}', 3) and dt <= date_add('${hivevar:v_dt}', 4) then 'd1' end--漏斗周期4的漏斗数据1

                                   ), '\\|') as dt_arr, user_num

                  from dw.tds_sms_down_his t where dt >= '${hivevar:v_dt}' and dt <= date_add('${hivevar:v_dt}', 4)  and data_type = 1 and prov_id = '44'

              ) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num

     ) d ON A.user_num = d.user_num and A.dt = d.dt**--漏斗块关联,主要方便后面组成需要的漏斗分析**

) u1

group by dt

;

2.漏斗分析数据结果

dt new_ord_ucn_day in_ord_cn1 in_sk_cn1 in_sms_down_cn1 in_ord_cn2 in_sk_cn2 in_sms_down_cn2 in_ord_cn3 in_sk_cn3 in_sms_down_cn3 in_ord_cn4 in_sk_cn4 in_sms_down_cn4

2022-06-01 53050 52469 2108 51 52336 2496 61 52204 2805 69 52051 3159 80

2022-06-02 4096 3931 689 51 3918 834 68 3908 978 81 0 0 0

2022-06-03 4715 4540 799 82 4529 987 103 0 0 0 0 0 0

2022-06-04 4546 4367 682 63 0 0 0 0 0 0 0 0 0

Time taken: 69.733 seconds, Fetched: 4 row(s)

3.炸裂数据周期的作用

主要把需要关联的数据,按周期划分炸裂成一块一块,方便后面and 各个表的块数据,组成需要的漏斗分析

列子: ,case when a.dt=‘a4’ and b.dt=‘a4’ and c.dt=‘a4’ and d.dt=‘a4’ then 1 end as in_sms_down_cn4 --第4天留存有呼转有短信下发用户数

–新增天期数据切块,按漏斗块切

              select split(

                             concat_ws('|'

                                 , case when dt = '${hivevar:v_dt}'              then 'a1' end--漏斗周期1的漏斗数据1

                                 , case when dt = '${hivevar:v_dt}'              then 'a2' end--漏斗周期1的漏斗数据2

                                 , case when dt = '${hivevar:v_dt}'              then 'a3' end--漏斗周期1的漏斗数据3

                                 , case when dt = '${hivevar:v_dt}'              then 'a4' end--漏斗周期1的漏斗数据4

                                 , case when dt = date_add('${hivevar:v_dt}', 1) then 'b1' end--漏斗周期2的漏斗数据1

                                 , case when dt = date_add('${hivevar:v_dt}', 1) then 'b2' end--漏斗周期2的漏斗数据2

                                 , case when dt = date_add('${hivevar:v_dt}', 1) then 'b3' end--漏斗周期2的漏斗数据3

                                 , case when dt = date_add('${hivevar:v_dt}', 2) then 'c1' end--漏斗周期3的漏斗数据1

                                 , case when dt = date_add('${hivevar:v_dt}', 2) then 'c2' end--漏斗周期3的漏斗数据2

                                 , case when dt = date_add('${hivevar:v_dt}', 3) then 'd1' end--漏斗周期4的漏斗数据1

                                 ), '\\|') as dt_arr,user_num

              from dw.tds_status_detail_hly_norm_his t

              where dt in (cast('${hivevar:v_dt}' as date)      --第1天

                  , cast(date_add('${hivevar:v_dt}', 1) as date)--第2天

                  , cast(date_add('${hivevar:v_dt}', 2) as date)--第3天

                  , cast(date_add('${hivevar:v_dt}', 3) as date)--第4天

                  ) and opt = '27' and prov_id = '44'

          ) t1 LATERAL VIEW explode(dt_arr) emp_temp AS dt group by dt, user_num
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值