-- 用户连续活跃区间记录表
CREATE TABLE dws.mall_app_uac_range(
guid bigint -- 用户id
,first_login_dt string -- 首访日期
,range_start_dt string -- 区间起始日期
,range_end_dt string -- 区间结束日期
)
partitioned by (dt string)
stored as orc
tblproperties('orc.compress'='snappy')
;
--从哪里计算 T-1日的连续活跃区间表 T日的日活表
--计算逻辑
-- 已封闭区间不用动;
-- 未封闭区间得看该用户在T日的日活中是否出现,如出现,不用动;否则,将区间封闭掉
-- 如果一个用户的区间记录中全部都是已封闭区间,但该用户在T日活跃了,则要为它生成一条新的区间记录
-- 如果日活中某用户在区间记录表中不存在,则为该用户生成新的区间记录
/* 站在2022-03-28日的角度
1,2022-01-01,2022-01-01,2022-02-10 1,0
1,2022-01-01,2022-02-14,9999-12-31 1,0
2,2022-02-03,2022-02-03,2022-02-12
3,2022-02-03,2022-02-03,2022-02-13 3,0
4,2022-02-04,2022-02-04,9999-12-31
5,1
2022-03-29日的日活数据
1,0
3,0
5,1
得出2022-03-29日的区间记录表
1,2022-01-01,2022-01-01,2022-02-10
1,2022-01-01,2022-01-14,9999-12-31
2,2022-02-03,2022-02-03,2022-02-12
3,2022-02-03,2022-02-03,2022-02-13
3,2022-02-03,2022-03-29,9999-12-31
4,2022-02-04,2022-02-04,2022-03-28
5,2022-02-16,2022-03-29,9999-12-31
-- 1.先拿T-1区间表 FULL JOIN T日活
-- 2.再从T-1区间表中,筛选出:不存在未封闭区间的人 JOIN T日活
-- 合并1、2 两部分结果
*/
with rng as (
SELECT
guid,
first_login_dt,
range_start_dt,
range_end_dt
FROM dws.mall_app_uac_range
WHERE dt='2022-03-28'
),dau as (
SELECT
guid,
is_new
FROM dws.mall_app_dau
WHERE dt='2022-03-29'
)
INSERT INTO TABLE dws.mall_app_uac_range PARTITION (dt='2022-03-29')
SELECT
nvl(rng.guid,dau.guid) as guid,
nvl(rng.first_login_dt,'2022-03-29') as first_login_dt,
nvl(rng.range_start_dt,'2022-03-29') as range_start_dt,
case
when dau.guid is null and rng.range_end_dt='9999-12-31' then '2022-03-28' --该用户今天没来,该情况需要封闭区间
when dau.guid is not null and rng.range_end_dt is null then '9999-12-31'
else rng.range_end_dt
end as range_end_dt
FROM
rng
FULL JOIN
dau
ON rng.guid=dau.guid
UNION ALL
SELECT
t1.guid as guid,
t1.first_login_dt as first_login_dt,
'2022-03-29' as range_start_dt,
'9999-12-31' as range_end_dt
FROM
(
SELECT
guid,
first_login_dt
FROM rng
GROUP BY guid,first_login_dt
HAVING max(range_end_dt) != '9999-12-31'
) t1
JOIN
dau
ON t1.guid=dau.guid
;
-- 算完最新区间表后,可以立即删除前日区间表分区
ALTER TABLE dws.mall_app_uac_range DROP PARTITION (dt='2022-03-28');
08-25
180
06-10
5595
03-29
2万+
09-14
269