目录
一、问题:
hive sql实现查询连续n天登陆的用户
二、思想:
1、选出用户id,以及登陆日期,去重
2、窗口函数按照用户id分组,登陆日期不重复排序
3、date_sub归一化登陆日期
4、按照用户id和归一化后的日期 计算 登陆日期 的数量
三、代码:
select distinct user_id
from (
select user_id
, continue_date
, count(login_date) as continue_day_cnt
from (
select user_id
, login_date
, date_sub(login_date,row_number() over(partition by user_id order by login_date) )as continue_date
from (
select user_id
, substr(login_time, 1, 10) as login_date
--from dw.dw_user_login_log
FROM
(
select 1 as user_id
,'2020-10-04' as login_date
union all
select 1 as user_id
,'2020-10-05' as login_date
union all
select 1 as user_id
,'2020-10-06' as login_date
union all
select 1 as user_id
,'2020-10-07' as login_date
) t1
where 1 = 1
group by user_id
, substr(login_time, 1, 10)
) t
) m
where 1 = 1
group by user_id
, continue_date
having count(login_date) >= 3 -- 连续3天登录,根据业务场景调整这一数值
) x ;