1.问题描述
统计连续登录天数超过3天的用户
2.解题思路
3.数据准备
3.1 建表
CREATE TABLE user_activity (
user_id varchar(20) ,
activity_date varchar(20)
)
3.2 数据
INSERT INTO user_activity VALUES ('user1', '2023-03-01');
INSERT INTO user_activity VALUES ('user2', '2023-03-02');
INSERT INTO user_activity VALUES ('user3', '2023-03-03');
INSERT INTO user_activity VALUES ('user4', '2023-03-04');
INSERT INTO user_activity VALUES ('user1', '2023-03-08');
INSERT INTO user_activity VALUES ('user2', '2023-03-08');
INSERT INTO user_activity VALUES ('user5', '2023-03-08');
INSERT INTO user_activity VALUES ('user6', '2023-03-08');
INSERT INTO user_activity VALUES ('user3', '2023-03-09');
INSERT INTO user_activity VALUES ('user5', '2023-03-09');
INSERT INTO user_activity VALUES ('user6', '2023-03-09');
INSERT INTO user_activity VALUES ('user7', '2023-03-09');
INSERT INTO user_activity VALUES ('user3', '2023-03-10');
INSERT INTO user_activity VALUES ('user5', '2023-03-10');
INSERT INTO user_activity VALUES ('user6', '2023-03-10');
INSERT INTO user_activity VALUES ('user7', '2023-03-10');
INSERT INTO user_activity VALUES ('user5', '2023-03-11');
INSERT INTO user_activity VALUES ('user6', '2023-03-11');
INSERT INTO user_activity VALUES ('user7', '2023-03-11');
INSERT INTO user_activity VALUES ('user6', '2023-03-12');
INSERT INTO user_activity VALUES ('user7', '2023-03-12');
INSERT INTO user_activity VALUES ('user7', '2023-03-13');
INSERT INTO user_activity VALUES ('user8', '2023-03-13');
INSERT INTO user_activity VALUES ('user7', '2023-03-14');
INSERT INTO user_activity VALUES ('user8', '2023-03-14');
INSERT INTO user_activity VALUES ('user7', '2023-03-15');
INSERT INTO user_activity VALUES ('user8', '2023-03-15');
INSERT INTO user_activity VALUES ('user8', '2023-03-16');
4.解法1
WITH w1 AS (
SELECT
user_id,
to_date(activity_date,'YYYY-MM-DD') AS activity_date,
ROW_NUMBER() over(partition by user_id order by activity_date) as rn
from user_activity
),
w2 AS (
SELECT
user_id,
activity_date,
date(activity_date - (rn || ' day')::INTERVAL) as sub_date
FROM w1
)
SELECT
user_id,
min(activity_date) AS start_date,
max(activity_date) AS end_date,
count(*) AS login_times
FROM w2
GROUP BY user_id,sub_date
having count(*) >=3
ORDER BY 1
5.解法2
WITH w1 AS
(
select
user_id,
to_date(activity_date,'YYYY-MM-DD') AS activity_date
FROM user_activity
),
w2 AS (
select
user_id,
LAG(activity_date,1,activity_date) over(partition by user_id order by activity_date) as lag_login_date,
activity_date as current_login_date,
LEAD(activity_date,1,activity_date) over(partition by user_id order by activity_date) as lead_login_date
from w1
ORDER BY 1,3
),
w3 AS (
SELECT
user_id,
current_login_date,
lead_login_date - current_login_date AS diff
FROM w2
WHERE lead_login_date - current_login_date IN (0,1)
)
SELECT
user_id,
min(current_login_date) AS start_date,
max(current_login_date) AS end_date,
count(diff) AS login_times
FROM w3
GROUP BY user_id
HAVING count(diff)>=3
ORDER BY 1
参考
https://blog.csdn.net/m0_49303490/article/details/130469205