连续N天登录
现有用户登录表(user_active_log)一份,里面有2个字段:userId(用户ID),createdTime(登录时间戳),需要统计2021年12月份连续登录了7天的用户数量。
第一种方法
思路:日期列减去一列数字得到的日期相等
第一步:选取12月份的记录,根据用户id和登录日期去重
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
第二步:创建辅助列a_rk(每个userId下的日期排序值)
select userId, a_createdTime, row_number() OVER(PARTITION BY userId ORDER BY a_createdTime) a_rk
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
第三步:创建辅助列b_createdTime(用登录日期减去排序值,得到新时间列)
select *, DATE_SUB(a_createdTime, INTERVAL a_rk - 1 DAY) b_createdTime
from (
select userId, a_createdTime, row_number() OVER(PARTITION BY userId ORDER BY a_createdTime) a_rk
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
) t1
第四步:根据起步时间列统计连续登录天数
select userId, b_createdTime, count(1) 'days'
from (
select *, DATE_SUB(a_createdTime, INTERVAL a_rk - 1 DAY) b_createdTime
from (
select userId, a_createdTime, row_number() OVER(PARTITION BY userId ORDER BY a_createdTime) a_rk
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
) t1
) t2
GROUP BY userId, b_createdTime
HAVING count(1) > 6
第二种方案
思路:划分窗口为当前行和前六行,计算max-min大于等于6天
select DISTINCT userId
from (
select userId, max(a_createdTime) over win as 'a', min(a_createdTime) over win as 'b'
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
WINDOW win as (PARTITION by userId ORDER BY a_createdTime ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
) t1
where datediff(a,b) >= 6