题目
某游戏公司记录的用户每日登录数据
id dt
1001 2021-12-12
1002 2021-12-12
1001 2021-12-13
1001 2021-12-14
1001 2021-12-16
1002 2021-12-16
1001 2021-12-19
1002 2021-12-17
1001 2021-12-20
计算每个用户最大的连续登录天数,可以间隔一天,例如:如果用户在1,3,5,6登录游戏,则视为连续登录6天
WITH login_data AS (
SELECT
id,
dt,
-- 使用datediff和lag函数计算当前日期和上一个日期的差值
datediff(dt, lag(dt, 1) OVER (PARTITION BY id ORDER BY dt)) AS diff
FROM
user_login
),
-- 创建一个名为continuous_login的中间表,判断是否开始了新的连续登录,并对新的连续登录进行分组
continuous_login AS (
SELECT
id,
dt,
-- 如果diff为NULL或者大于2,那么表示开始了新的连续登录
CASE
WHEN diff IS NULL OR diff > 2 THEN dt
ELSE null
END AS new_login,
sum(CASE WHEN diff IS NULL OR diff > 2 THEN 1 ELSE 0 END) OVER (PARTITION BY id ORDER BY dt) AS group_id
FROM
login_data
),
-- 计算每个用户每组连续登录的天数,并取最大值作为最大的连续登录天数
max_continuous_login AS (
SELECT
id,
datediff(dt, first_value(new_login) OVER (PARTITION BY id ,group_id ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) + 1 AS continuous_days
FROM
continuous_login
)
SELECT
id,
max(continuous_days) AS max_continuous_days
FROM
max_continuous_login
GROUP BY
id;
过程解析: