180.1.连续登录N天(数据库)
原表
使用lag&lead+datediff窗口函数
一个计算有误的例子
SELECT a.user_id, a.login_date AS day1, LEAD(a.login_date,1)OVER(ORDER BY a.user_id) AS day2, LEAD(a.login_date,2)OVER(ORDER BY a.user_id) AS day3 FROM login_log a | SELECT a.user_id, a.login_date AS day1, LEAD(a.login_date,1)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day2, LEAD(a.login_date,2)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day3 FROM login_log a |
上次是求连续出现的数字,变换一下就能求连续的日期,只需要多用一个datediff函数就行。
但这次不一样的是,每一个日期都有与之对应的用户。虽然日期是连续的,倘若日期不属于同一个用户,那么就不能计算在内。
图1就是因为没分组,直接order by,导致输出结果将id为2的用户也计算在内
SQL代码
连续登录三天,输出结果是1
SELECT DISTINCT t.user_id FROM
(
SELECT a.user_id,
a.login_date AS day1,
LEAD(a.login_date,1)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day2,
LEAD(a.login_date,2)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day3
FROM login_log a
) t
WHERE DATEDIFF(t.day2,t.day1)=1 AND DATEDIFF(t.day3,t.day2)=1
-- 来个混合型的
SELECT DISTINCT user_id
FROM
(SELECT user_id,
LAG(login_date,1) OVER(PARTITION BY user_id ORDER BY login_date) AS lag_login_date,
login_date,
LEAD(login_date,1) OVER(PARTITION BY user_id ORDER BY login_date) AS lead_login_date
FROM dwd.login_log)t1
WHERE DATEDIFF(login_date,lag_login_date)=1 AND DATEDIFF(lead_login_date,login_date)=1
SELECT user_id,
LAG(login_date,1) OVER(PARTITION BY user_id ORDER BY login_date) AS lag_login_date,
login_date,
LEAD(login_date,1) OVER(PARTITION BY user_id ORDER BY login_date) AS lead_login_date
FROM login_log
连续登录两天,输出结果是1和3
SELECT DISTINCT t.user_id FROM
(
SELECT a.user_id,
a.login_date AS day1,
LEAD(a.login_date,1)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day2,
LEAD(a.login_date,2)OVER(PARTITION BY a.user_id ORDER BY a.user_id) AS day3
FROM login_log a
) t
WHERE DATEDIFF(t.day2,t.day1)=1
使用date_sub函数
我用的是msql,不同数据库之间语法会有差异
SELECT DATE_ADD('2022-02-21', INTERVAL 12 DAY), DATE_SUB('2022-02-23', INTERVAL 13 DAY)
>>输出结果
2022-03-05 2022-02-10
SELECT user_id, login_date, RANK()OVER(PARTITION BY user_id ORDER BY login_date) rk FROM login_log | SELECT user_id,login_date,rk-1, DATE_SUB(login_date, INTERVAL t1.rk-1 DAY) AS con_login_date FROM (SELECT user_id,login_date, RANK()OVER(PARTITION BY user_id ORDER BY login_date) rk FROM login_log) t1 |
SELECT user_id,con_login_date,COUNT(*) nums
FROM
(
SELECT user_id,login_date,rk-1, DATE_SUB(login_date, INTERVAL t1.rk-1 DAY) AS con_login_date
FROM
(SELECT user_id,login_date,RANK()OVER(PARTITION BY user_id ORDER BY login_date) rk
FROM login_log) t1
)t2
GROUP BY user_id,con_login_date
HAVING COUNT(*) >= 3;