数据:
User_id login_date
1001 2021-08-02
1001 2021-08-01
1001 2021-08-03
1001 2021-08-05
1001 2021-08-06
1001 2021-08-07
1001 2021-08-10
1001 2021-08-12
1002 2021-08-01
1002 2021-08-02
1002 2021-08-03
1002 2021-08-07
1002 2021-08-09
1002 2021-08-11
1002 2021-08-13
1002 2021-08-15
建表语句:
DROP TABLE IF EXISTS `login`;
CREATE TABLE `login` (
`user_id` int NOT NULL,
`login_date` date NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = gbk COLLATE = gbk_chinese_ci ROW_FORMAT = Dynamic;
INSERT INTO `login` VALUES (1001, '2021-08-02');
INSERT INTO `login` VALUES (1001, '2021-08-01');
INSERT INTO `login` VALUES (1001, '2021-08-03');
INSERT INTO `login` VALUES (1001, '2021-08-05');
INSERT INTO `login` VALUES (1001, '2021-08-06');
INSERT INTO `login` VALUES (1001, '2021-08-07');
INSERT INTO `login` VALUES (1001, '2021-08-10');
INSERT INTO `login` VALUES (1001, '2021-08-12');
INSERT INTO `login` VALUES (1002, '2021-08-01');
INSERT INTO `login` VALUES (1002, '2021-08-02');
INSERT INTO `login` VALUES (1002, '2021-08-03');
INSERT INTO `login` VALUES (1002, '2021-08-07');
INSERT INTO `login` VALUES (1002, '2021-08-09');
INSERT INTO `login` VALUES (1002, '2021-08-11');
INSERT INTO `login` VALUES (1002, '2021-08-13');
INSERT INTO `login` VALUES (1002, '2021-08-15');
方法一:等差数列法,不灵活
select
user_id,max(continuous) max_continuous
from (
select
-- sum(days)是指间隔连续的天数,而count(*)-1是间隔连续的天数中间的值,
-- 比如1,3,5天,sum(days)=3,而count(*)-1就是2,4这两天
user_id,sum(days)+count(*)-1 continuous
from (
select
user_id,flag_1,days,
-- 再继续对flag_1等差,这样算出来的相同的日期就是间隔了一天的
date_sub(flag_1,interval row_number() over(partition by user_id order by flag_1) day) flag_2
from (
select -- 查询出绝对连续的天数
user_id,
flag_1,
count(1) days
from (
select
user_id,login_date,
-- 连续的日期flag_1是相同,而隔了一天的日期flag_1是差一天的
DATE_SUB(login_date,interval row_number() over(partition by user_id order by login_date) day) flag_1
from login) t1 group by user_id,flag_1
) t2) t3 group by user_id,flag_2 )t4 group by user_id
方法二:现上分组法,灵活,可指定间隔n天连续
select
-- 按id分组,求最大的连续天数
user_id,
max(continuous_login_days) max_continuous_login_days
from (
select
-- 将组内最大日期和最小日期相减+1就是连续天数
user_id,datediff(max(login_date),min(login_date))+1 continuous_login_days
from (
select
user_id,login_date,
-- 超过2天以上的新开一组,因为sum默认是最前行到当前行,差值不到2天的会sum(0),就等于没加
-- 若断2天算连续,这里就改为sum(if(diff>3,1,0))
sum(if(diff>2,1,0)) over(partition by user_id order by login_date) group_id
from (
select
user_id,login_date,
datediff(login_date,prev_day) diff -- 计算与前一天的差值
from(
select
user_id,login_date,
lag(login_date,1,"1970-01-01") over(partition by user_id order by login_date) prev_day
from login) t1 ) t2 ) t3
group by user_id,group_id ) t4
group by user_id
结果:
user_id | max_continuous |
1001 | 7 |
1002 | 9 |