创建表test_user_login_3days
:
create table test_user_login_3days(
user_id int,
login_date date
);
insert into test_user_login_3days values (123,'2018-08-02');
insert into test_user_login_3days values (123,'2018-08-03');
insert into test_user_login_3days values (123,'2018-08-04');
insert into test_user_login_3days values (456,'2018-11-02');
insert into test_user_login_3days values (456,'2018-12-09');
insert into test_user_login_3days values (456,'2018-11-04');
insert into test_user_login_3days values (456,'2018-11-05');
insert into test_user_login_3days values (789,'2018-01-01');
insert into test_user_login_3days values (789,'2018-04-23');
insert into test_user_login_3days values (789,'2018-09-10');
insert into test_user_login_3days values (789,'2018-09-11');
insert into test_user_login_3days values (789,'2018-09-12');
insert into test_user_login_3days values (10001,'2018-04-23');
insert into test_user_login_3days values (10001,'2018-04-24');
insert into test_user_login_3days values (10001,'2018-09-11');
insert into test_user_login_3days values (10001,'2018-09-12');
+---------+------------+
| user_id | login_date |
+---------+------------+
| 123 | 2018-08-02 |
| 123 | 2018-08-03 |
| 123 | 2018-08-04 |
| 456 | 2018-11-02 |
| 456 | 2018-12-09 |
| 789 | 2018-01-01 |
| 789 | 2018-04-23 |
| 789 | 2018-09-10 |
| 789 | 2018-09-11 |
| 789 | 2018-09-12 |
| 10001 | 2018-04-23 |
| 10001 | 2018-04-24 |
| 10001 | 2018-09-11 |
| 10001 | 2018-09-12 |
| 10001 | 2018-09-12 |
| 456 | 2018-11-05 |
| 456 | 2018-11-04 |
+---------+------------+
17 rows in set (0.00 sec)
用到的函数:lead(column, 1, null)
:取字段column
提前一行的数据。
用法详见:SQL SERVER LEAD和LAG函数
思路:
生成新字段lead_1day
、lead_2day
分别表示把login_date
列提前一行、两行:
select user_id, login_date,
lead(login_date, 1) over(partition by user_id order by login_date) as lead_1day, # 提前一行的数据
lead(login_date, 2) over(partition by user_id order by login_date) as lead_2day # 提前两行的数据
from test_user_login_3days
+---------+------------+------------+------------+
| user_id | login_date | lead_1day | lead_2day |
+---------+------------+------------+------------+
| 123 | 2018-08-02 | 2018-08-03 | 2018-08-04 |
| 123 | 2018-08-03 | 2018-08-04 | NULL |
| 123 | 2018-08-04 | NULL | NULL |
| 456 | 2018-11-02 | 2018-11-04 | 2018-11-05 |
| 456 | 2018-11-04 | 2018-11-05 | 2018-12-09 |
| 456 | 2018-11-05 | 2018-12-09 | NULL |
| 456 | 2018-12-09 | NULL | NULL |
| 789 | 2018-01-01 | 2018-04-23 | 2018-09-10 |
| 789 | 2018-04-23 | 2018-09-10 | 2018-09-11 |
| 789 | 2018-09-10 | 2018-09-11 | 2018-09-12 |
| 789 | 2018-09-11 | 2018-09-12 | NULL |
| 789 | 2018-09-12 | NULL | NULL |
| 10001 | 2018-04-23 | 2018-04-24 | 2018-09-11 |
| 10001 | 2018-04-24 | 2018-09-11 | 2018-09-12 |
| 10001 | 2018-09-11 | 2018-09-12 | 2018-09-12 |
| 10001 | 2018-09-12 | 2018-09-12 | NULL |
| 10001 | 2018-09-12 | NULL | NULL |
+---------+------------+------------+------------+
17 rows in set (0.05 sec)
这时,如果lead_2day
和lead_1day
相差1天,并且lead_1day
和login_date
也相差1天,那么就意味着存在连续三天。
加入判断语句:where (lead_2day - lead_1day) = 1 and (lead_1day - login_date) = 1
select * from (
select *,
lead(login_date, 1) over(partition by user_id order by login_date) as lead_1day, # 提前一行的数据
lead(login_date, 2) over(partition by user_id order by login_date) as lead_2day from # 提前两行的数据test_user_login_3days
) as A
where (lead_2day - lead_1day) = 1
and (lead_1day - login_date) = 1
也就是说,用户123和用户789连续登陆了。
+---------+------------+------------+------------+
| user_id | login_date | lead_1day | lead_2day |
+---------+------------+------------+------------+
| 123 | 2018-08-02 | 2018-08-03 | 2018-08-04 |
| 789 | 2018-09-10 | 2018-09-11 | 2018-09-12 |
+---------+------------+------------+------------+
2 rows in set (0.00 sec)
最终代码
写法一:
select A.user_id from (
select *,
lead(login_date, 1) over(partition by user_id order by login_date) as lead_1day, # 提前一行的数据
lead(login_date, 2) over(partition by user_id order by login_date) as lead_2day from # 提前两行的数据test_user_login_3days
) as A
where (lead_2day - lead_1day) = 1
and (lead_1day - login_date) = 1
写法二:
select user_id from (
select user_id,
datediff(lead(login_date, 1) over(partition by user_id order by login_date), login_date) as diff_1day,
datediff(lead(login_date, 2) over(partition by user_id order by login_date), login_date) as diff_2day
from test_user_login_3days
) as A
where diff_1day = 1 and diff_2day = 2;
ps.如果是sql就不用子查询了。