用户漏斗数据
每日都要更新
这次优化使用shuffle,果然效率提高了一倍
/*
编写 人:me
编写日期:2020.10.28
修改日期:2020.11.2
目 标:建设承影看板
修改:1、增加上上周注册用户,2、呈现近8周的数据
*/
---------------------------分城市,近8周漏斗数据
CREATE table pri_da_risk.dmh_mck_week_funnel_city_last_8week as (
SELECT STRAIGHT_JOIN a5.city_name AS city_name
,a1.week AS analy_clycle
,CASE WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd')) THEN '本周注册'
WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+1) THEN '上周注册'
WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+2) THEN '上上周注册'
ELSE '上上周前注册'
END AS user_type
,a1.source_channel AS source_channel
,a1.source AS source
,count(distinct a1.passenger_uuid) AS cnt_register
,count(distinct a2.passenger_id) AS cnt_activer
,count( distinct a3.passenger_id) AS cnt_bubbler
,count(distinct a6.passenger_id) AS cnt_caller
,count(distinct a7.passenger_id) AS cnt_replyer
,count( distinct a8.passenger_uuid) AS cnt_finisher
,count(distinct a8.route_plan_uuid) AS cnt_finish_routid
,sum(a8.route_cash_flow)/100 AS route_cash_flow
,sum(a8.pay_coupon_amount) /100 AS pay_coupon_amount
FROM (SELECT weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
,ds AS ds
,passenger_uuid AS passenger_uuid
,source AS source
,source_channel AS source_channel
,min(register_time) AS register_time
,max(register_city) AS register_city
FROM pub_cockpit_db.dm_user_basic_char_dt
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
GROUP BY week
,passenger_uuid
,source
,source_channel
,ds
) a1
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.ods_user_app_open_detail
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND client_id='1'
) a2
ON a1.passenger_uuid = a2.passenger_id
AND a1.week = a2.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.ods_ord_bubble_p_detail_fact
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
) a3
ON a1.passenger_uuid =a3.passenger_id
AND a1.week = a3.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND substr(create_route_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(create_route_time,'yyyy-MM-dd'))
) a6
ON a1.passenger_uuid = a6.passenger_id
AND a1.week = a6.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND substr(driver_reply_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(driver_reply_time,'yyyy-MM-dd'))
) a7
ON a1.passenger_uuid = a7.passenger_id
AND a1.week = a7.week
LEFT JOIN [shuffle]
(SELECT passenger_uuid
,route_plan_uuid
,ds
,route_cash_flow
,pay_coupon_amount
,weekofyear(to_timestamp(settlement_time,'yyyy-MM-dd')) AS week
FROM pub_cockpit_db.dws_tra_pay_p_fact_dt
WHERE substr(settlement_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND ds <> '20200831'
AND ds <> '20200807'
AND ds <> '20200801'
AND ds <> '20200830' -------该表这几天分区有问题,不剔除会报错
) a8
ON a1.passenger_uuid =a8.passenger_uuid
AND a1.week = a8.week
LEFT JOIN [shuffle]
(SELECT city_code
,city_name
FROM pub_cockpit_db.dim_t_city
GROUP BY city_code,city_name
) a5
ON a1.register_city = a5.city_code
WHERE a5.city_name in ('南京市','武汉市','广州市','重庆市','天津市','成都市','杭州市','郑州市','长沙市','长春市')
GROUP BY city_name
,analy_clycle
,user_type
,source_channel
,source
)
---------------------------全国,近8周漏斗数据
CREATE table pri_da_risk.dmh_mck_week_funnel_china_last_8week as (
SELECT STRAIGHT_JOIN '全国' AS city_name
,a1.week AS analy_clycle
,CASE WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd')) THEN '本周注册'
WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+1) THEN '上周注册'
WHEN weekofyear(to_timestamp(a1.ds,'yyyyMMdd')) = (weekofyear(to_timestamp(a1.register_time,'yyyy-MM-dd'))+2) THEN '上上周注册'
ELSE '上上周前注册'
END AS user_type
,a1.source_channel AS source_channel
,a1.source AS source
,count(distinct a1.passenger_uuid) AS cnt_register
,count(distinct a2.passenger_id) AS cnt_activer
,count( distinct a3.passenger_id) AS cnt_bubbler
,count(distinct a6.passenger_id) AS cnt_caller
,count(distinct a7.passenger_id) AS cnt_replyer
,count( distinct a8.passenger_uuid) AS cnt_finisher
,count(distinct a8.route_plan_uuid) AS cnt_finish_routid
,sum(a8.route_cash_flow)/100 AS route_cash_flow
,sum(a8.pay_coupon_amount) /100 AS pay_coupon_amount
FROM (SELECT weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
,ds AS ds
,passenger_uuid AS passenger_uuid
,source AS source
,source_channel AS source_channel
,min(register_time) AS register_time
,max(register_city) AS register_city
FROM pub_cockpit_db.dm_user_basic_char_dt
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
GROUP BY week
,passenger_uuid
,source
,source_channel
,ds
) a1
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.ods_user_app_open_detail
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND client_id='1'
) a2
ON a1.passenger_uuid = a2.passenger_id
AND a1.week = a2.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.ods_ord_bubble_p_detail_fact
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
) a3
ON a1.passenger_uuid =a3.passenger_id
AND a1.week = a3.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND substr(create_route_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(create_route_time,'yyyy-MM-dd'))
) a6
ON a1.passenger_uuid = a6.passenger_id
AND a1.week = a6.week
LEFT JOIN [shuffle]
(SELECT distinct passenger_id
,weekofyear(to_timestamp(ds,'yyyyMMdd')) AS week
FROM pub_cockpit_db.dws_ord_route_p_detail_fact_ds
WHERE ds >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyyMMdd')
AND substr(driver_reply_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND weekofyear(to_timestamp(ds,'yyyyMMdd')) = weekofyear(to_timestamp(driver_reply_time,'yyyy-MM-dd'))
) a7
ON a1.passenger_uuid = a7.passenger_id
AND a1.week = a7.week
LEFT JOIN [shuffle]
(SELECT passenger_uuid
,route_plan_uuid
,ds
,route_cash_flow
,pay_coupon_amount
,weekofyear(to_timestamp(settlement_time,'yyyy-MM-dd')) AS week
FROM pub_cockpit_db.dws_tra_pay_p_fact_dt
WHERE substr(settlement_time,1,10) >= from_unixtime(unix_timestamp( date_add(now(),-(dayofweek(now())-2))- interval 8 weeks ),'yyyy-MM-dd')
AND ds <> '20200831'
AND ds <> '20200807'
AND ds <> '20200801'
AND ds <> '20200830' -------该表这几天分区有问题,不剔除会报错
) a8
ON a1.passenger_uuid =a8.passenger_uuid
AND a1.week = a8.week
GROUP BY city_name
,analy_clycle
,user_type
,source_channel
,source
)
---------------------------合并城市表和全国表,近8周漏斗数据
CREATE table pri_da_risk.dmh_mck_week_funnel_last_8week AS
(
SELECT * FROM pri_da_risk.dmh_mck_week_funnel_city_last_8week
UNION ALL
SELECT * FROM pri_da_risk.dmh_mck_week_funnel_china_last_8week
)