用户漏斗每日更新

用户漏斗数据
每日都要更新
这次优化使用shuffle,果然效率提高了一倍
/*
编写 人:me
编写日期:2020.10.28
修改日期:2020.11.2
目 标:建设承影看板
修改:1、增加上上周注册用户,2、呈现近8周的数据
*/

---------------------------分城市,近8周漏斗数据


CREATE table pri_da_risk.dmh_mck_week_funnel_city_last_8week as (
SELECT STRAIGHT_JOIN a5.city_name                          AS city_name
  ,a1.week                              AS analy_clycle
  ,CASE WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) =  weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))    THEN '本周注册'
     WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+1) THEN '上周注册'  
     WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+2) THEN '上上周注册' 
        ELSE '上上周前注册'
  END                                  AS user_type 
  ,a1.source_channel                    AS source_channel      
  ,a1.source                            AS source 
  ,count(distinct a1.passenger_uuid)    AS cnt_register
  ,count(distinct a2.passenger_id)      AS cnt_activer
  ,count( distinct a3.passenger_id)     AS cnt_bubbler        
  ,count(distinct a6.passenger_id)      AS cnt_caller          
  ,count(distinct a7.passenger_id)      AS cnt_replyer       
  ,count( distinct a8.passenger_uuid)   AS cnt_finisher     
  ,count(distinct a8.route_plan_uuid)   AS cnt_finish_routid
  ,sum(a8.route_cash_flow)/100          AS route_cash_flow  
  ,sum(a8.pay_coupon_amount) /100       AS pay_coupon_amount  
FROM (SELECT weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
    ,ds                                      AS ds
    ,passenger_uuid         AS passenger_uuid
    ,source                 AS source
    ,source_channel         AS source_channel
    ,min(register_time)                      AS register_time
    ,max(register_city)                      AS register_city
  FROM pub_cockpit_db.dm_user_basic_char_dt
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd') 
  GROUP BY week
    ,passenger_uuid
    ,source
    ,source_channel
    ,ds
  ) a1  
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.ods_user_app_open_detail 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  client_id='1'
     ) a2
ON  a1.passenger_uuid = a2.passenger_id 
AND  a1.week = a2.week       
LEFT JOIN  [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM  pub_cockpit_db.ods_ord_bubble_p_detail_fact
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  ) a3
ON  a1.passenger_uuid =a3.passenger_id
AND  a1.week = a3.week
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  substr(create_route_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND  weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(create_route_time,'yyyy-MM-dd'))
  ) a6
ON  a1.passenger_uuid = a6.passenger_id 
AND  a1.week = a6.week
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  substr(driver_reply_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND  weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(driver_reply_time,'yyyy-MM-dd'))
  ) a7
ON  a1.passenger_uuid = a7.passenger_id 
AND  a1.week = a7.week  
LEFT JOIN  [shuffle]
  (SELECT passenger_uuid
    ,route_plan_uuid
    ,ds
    ,route_cash_flow
    ,pay_coupon_amount  
    ,weekofyear(to_timestamp(settlement_time,'yyyy-MM-dd'))  AS week
  FROM pub_cockpit_db.dws_tra_pay_p_fact_dt
  WHERE substr(settlement_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND ds <> '20200831'  
  AND ds <> '20200807' 
  AND ds <> '20200801'
  AND ds <> '20200830'  -------该表这几天分区有问题,不剔除会报错  
  ) a8
ON  a1.passenger_uuid =a8.passenger_uuid 
AND  a1.week = a8.week
LEFT JOIN [shuffle]
  (SELECT city_code
    ,city_name 
  FROM pub_cockpit_db.dim_t_city  
  GROUP BY city_code,city_name
  ) a5
ON  a1.register_city = a5.city_code
WHERE a5.city_name in ('南京市','武汉市','广州市','重庆市','天津市','成都市','杭州市','郑州市','长沙市','长春市') 
GROUP BY city_name
  ,analy_clycle
  ,user_type
  ,source_channel       
  ,source 
)  

---------------------------全国,近8周漏斗数据


CREATE table pri_da_risk.dmh_mck_week_funnel_china_last_8week as (
SELECT STRAIGHT_JOIN '全国'                                AS city_name
  ,a1.week                              AS analy_clycle
  ,CASE WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) =  weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))    THEN '本周注册'
     WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+1) THEN '上周注册'  
     WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+2) THEN '上上周注册' 
        ELSE '上上周前注册'
  END                                  AS user_type 
  ,a1.source_channel                    AS source_channel      
  ,a1.source                            AS source 
  ,count(distinct a1.passenger_uuid)    AS cnt_register
  ,count(distinct a2.passenger_id)      AS cnt_activer
  ,count( distinct a3.passenger_id)     AS cnt_bubbler        
  ,count(distinct a6.passenger_id)      AS cnt_caller          
  ,count(distinct a7.passenger_id)      AS cnt_replyer       
  ,count( distinct a8.passenger_uuid)   AS cnt_finisher     
  ,count(distinct a8.route_plan_uuid)   AS cnt_finish_routid
  ,sum(a8.route_cash_flow)/100          AS route_cash_flow  
  ,sum(a8.pay_coupon_amount) /100       AS pay_coupon_amount  
FROM (SELECT weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
    ,ds                                      AS ds
    ,passenger_uuid         AS passenger_uuid
    ,source                 AS source
    ,source_channel         AS source_channel
    ,min(register_time)                      AS register_time
    ,max(register_city)                      AS register_city
  FROM pub_cockpit_db.dm_user_basic_char_dt
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd') 
  GROUP BY week
    ,passenger_uuid
    ,source
    ,source_channel
    ,ds
  ) a1  
LEFT JOIN  [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.ods_user_app_open_detail 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  client_id='1'
     ) a2
ON  a1.passenger_uuid = a2.passenger_id 
AND  a1.week = a2.week       
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM  pub_cockpit_db.ods_ord_bubble_p_detail_fact
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  ) a3
ON  a1.passenger_uuid =a3.passenger_id
AND  a1.week = a3.week
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  substr(create_route_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND  weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(create_route_time,'yyyy-MM-dd'))
  ) a6
ON  a1.passenger_uuid = a6.passenger_id 
AND  a1.week = a6.week
LEFT JOIN [shuffle]
  (SELECT distinct passenger_id
    ,weekofyear(to_timestamp(ds,'yyyyMMdd'))  AS week
  FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds 
  WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyyMMdd')  
  AND  substr(driver_reply_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND  weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(driver_reply_time,'yyyy-MM-dd'))
  ) a7
ON  a1.passenger_uuid = a7.passenger_id 
AND  a1.week = a7.week  
LEFT JOIN [shuffle]
  (SELECT passenger_uuid
    ,route_plan_uuid
    ,ds
    ,route_cash_flow
    ,pay_coupon_amount  
    ,weekofyear(to_timestamp(settlement_time,'yyyy-MM-dd'))  AS week
  FROM pub_cockpit_db.dws_tra_pay_p_fact_dt
  WHERE substr(settlement_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval  8 weeks ),'yyyy-MM-dd')
  AND ds <> '20200831'  
  AND ds <> '20200807' 
  AND ds <> '20200801'
  AND ds <> '20200830'  -------该表这几天分区有问题,不剔除会报错  
  ) a8
ON  a1.passenger_uuid =a8.passenger_uuid 
AND  a1.week = a8.week
GROUP BY city_name
  ,analy_clycle
  ,user_type
  ,source_channel       
  ,source 
) 

---------------------------合并城市表和全国表,近8周漏斗数据


CREATE table pri_da_risk.dmh_mck_week_funnel_last_8week AS 
( 
SELECT * FROM pri_da_risk.dmh_mck_week_funnel_city_last_8week
UNION ALL
SELECT * FROM  pri_da_risk.dmh_mck_week_funnel_china_last_8week
)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
一、课程简介随着技术的飞速发展,经过多年的数据积累,各互联网公司已保存了海量的原始数据和各种业务数据,所以数据仓库技术是各大公司目前都需要着重发展投入的技术领域。数据仓库是面向分析的集成化数据环境,为企业所有决策制定过程,提供系统数据支持的战略集合。通过对数据仓库中数据的分析,可以帮助企业改进业务流程、控制成本、提高产品质量等。二、课程内容本次精心打造的数仓项目的课程,从项目架构的搭建,到数据采集模块的设计、数仓架构的设计、实战需求实现、即席查询的实现,我们针对国内目前广泛使用的Apache原生框架和CDH版本框架进行了分别介绍,Apache原生框架介绍中涉及到的技术框架包括Flume、Kafka、Sqoop、MySql、HDFS、Hive、Tez、Spark、Presto、Druid等,CDH版本框架讲解包括CM的安装部署、Hadoop、Zookeeper、Hive、Flume、Kafka、Oozie、Impala、HUE、Kudu、Spark的安装配置,透彻了解不同版本框架的区别联系,将大数据全生态系统前沿技术一网打尽。在过程中对大数据生态体系进行了系统的讲解,对实际企业数仓项目中可能涉及到的技术点都进行了深入的讲解和探讨。同时穿插了大量数仓基础理论知识,让你在掌握实战经验的同时能够打下坚实的理论基础。三、课程目标本课程以国内电商巨头实际业务应用场景为依托,对电商数仓的常见实战指标以及难点实战指标进行了详尽讲解,具体指标包括:每日、周、月活跃设备明细,留存用户比例,沉默用户、回流用户、流失用户统计,最近连续3周活跃用户统计,最近7天内连续3天活跃用户统计,GMV成交总额分析,转化率及漏斗分析,品牌复购率分析、订单表拉链表的设计等,让学生拥有更直观全面的实战经验。通过对本课程的学习,对数仓项目可以建立起清晰明确的概念,系统全面的掌握各项数仓项目技术,轻松应对各种数仓难题。四、课程亮点本课程结合国内多家企业实际项目经验,特别加入了项目架构模块,从集群规模的确定到框架版本选型以及服务器选型,手把手教你从零开始搭建大数据集群。并且总结大量项目实战中会遇到的问题,针对各个技术框架,均有调优实战经验,具体包括:常用Linux运维命令、Hadoop集群调优、Flume组件选型及性能优化、Kafka集群规模确认及关键参数调优。通过这部分学习,助学生迅速成长,获取前沿技术经验,从容解决实战问题。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值