SQL求出最大连续登陆天数

一、题目描述

求解用户登陆信息表中,每个用户连续登陆平台的天数,连续登陆基础为汇总日期必须登陆,表中每天只有一条用户登陆数据(计算中不涉及天内去重)。

表描述:user_id:用户的id;

sigin_date:用户的登陆日期。

二、解法分析

注:求解过程有多种方式,下述求解解法为笔者思路,其他解法可在评论区交流。

思路:

该问题的突破的在于登陆时间,计算得到连续登陆标识,以标识分组为过滤条件,得到连续登陆的天数,最后以user_id分组,以count()函数求和得到每个用户的连续登陆天数。

连续登陆标识 =(当日登陆日期 - 用户的登陆日期)- 开窗排序的顺序号(倒序)

三、求解过程及结果展示

-- plsql
-- 1.建表语句
drop table if exists test_sigindate_cnt;
create table test_sigindate_cnt(
    user_id VARCHAR
    ,sigin_date varchar
)
;
 
-- 2.测试数据插入语句
insert INTO test_sigindate_cnt 
    
    select 'uid_1' as user_id,'2021-08-03' as sigin_date        
    union all
    select 'uid_1' as user_id,'2021-08-04' as sigin_date 
    union all
    select 'uid_1' as user_id,'2021-08-01' as sigin_date        
    union all
    select 'uid_1' as user_id,'2021-08-02' as sigin_date        
    union all
    select 'uid_1' as user_id,'2021-08-05' as sigin_date       
    union all
    select 'uid_1' as user_id,'2021-08-06' as sigin_date        
    union all
    select 'uid_2' as user_id,'2021-08-01' as sigin_date        
    union all
    select 'uid_2' as user_id,'2021-08-05' as sigin_date        
    union all
    select 'uid_2' as user_id,'2021-08-02' as sigin_date         
    union all
    select 'uid_2' as user_id,'2021-08-06' as sigin_date        
    union all
    select 'uid_3' as user_id,'2021-08-04' as sigin_date     
    union all
    select 'uid_3' as user_id,'2021-08-06' as sigin_date        
    union all
    select 'uid_4' as user_id,'2021-08-03' as sigin_date        
    union all
    select 'uid_4' as user_id,'2021-08-02' as sigin_date              
;


select  user_id
        ,count(1) as sigin_cnt
from    (
    select  
            user_id
            ,date_part('day',TIMESTAMP '2021-08-06'-sigin_date)  as data_diff
            ,row_number() over (partition by user_id order by sigin_date desc) as row_num
    from    test_sigindate_cnt
) t
where   data_diff - row_num = -1
group by 
        user_id
;

 

drop table if EXISTs user_login_table;
CREATE TABLE user_login_table
(
 user_id CHAR(10),
 login_date date
);


INSERT INTO user_login_table  VALUES ('201','2017/1/1');
INSERT INTO user_login_table  VALUES ('201','2017/1/2');
INSERT INTO user_login_table  VALUES ('202','2017/1/2');
INSERT INTO user_login_table  VALUES ('202','2017/1/3');
INSERT INTO user_login_table  VALUES ('203','2017/1/3');
INSERT INTO user_login_table  VALUES ('201','2017/1/4');
INSERT INTO user_login_table  VALUES ('202','2017/1/4');
INSERT INTO user_login_table  VALUES ('201','2017/1/5');
INSERT INTO user_login_table  VALUES ('202','2017/1/5');
INSERT INTO user_login_table  VALUES ('201','2017/1/6');
INSERT INTO user_login_table  VALUES ('203','2017/1/6');
INSERT INTO user_login_table  VALUES ('203','2017/1/7');


-- 思路1
SELECT
user_id,max(login_days)
froM(
SELECT
user_id,
new_date,
count(*) as login_days 
FROM
(
SELECT
user_id,
login_date,
row_number() over ( PARTITION BY user_id ORDER BY login_date ) rn,
login_date - row_number() over ( PARTITION BY user_id ORDER BY login_date ) new_date 
FROM
user_login_table 
) a 
GROUP BY
user_id,
new_date ) b   
GROUP BY user_id;

-- 思路2

drop table if EXISTs dic_date_table;
CREATE TABLE dic_date_table
(
 dic_date date
);


INSERT INTO dic_date_table  VALUES ('2017/1/1');
INSERT INTO dic_date_table  VALUES ('2017/1/2');
INSERT INTO dic_date_table  VALUES ('2017/1/3');
INSERT INTO dic_date_table  VALUES ('2017/1/4');
INSERT INTO dic_date_table  VALUES ('2017/1/5');
INSERT INTO dic_date_table  VALUES ('2017/1/6');
INSERT INTO dic_date_table  VALUES ('2017/1/7');

SELECT
T.user_id,
max( login_days ) max_login_days 
FROM
(
SELECT
t.user_id,
t.num,
count(*) login_days 
FROM
(
SELECT
a.user_id,
b.rn_1 - a.rn num 
FROM
( SELECT user_id, login_date, row_number() over ( PARTITION BY user_id ORDER BY login_date ) rn FROM user_login_table ) a
LEFT JOIN ( SELECT dic_date, row_number() over ( ORDER BY dic_date ) rn_1 FROM dic_date_table ) b ON a.login_date = b.dic_date 
) t 
GROUP BY
t.user_id,
t.num 
) T 
GROUP BY
T.user_id;

连续登陆问题的变型1(mysql)

 测试数据及结果代码

drop table if EXISTs app_data;
CREATE TABLE app_data
(
 client_id CHAR(10),
 login_day date,
 result CHAR(10)
);

INSERT INTO app_data  VALUES ('00001','2022-01-01','success');
INSERT INTO app_data  VALUES ('00002','2022-01-02','fail');
INSERT INTO app_data  VALUES ('00001','2022-01-03','timeout');
INSERT INTO app_data  VALUES ('00001','2022-01-15','success');
INSERT INTO app_data  VALUES ('00002','2022-01-16','timeout');
INSERT INTO app_data  VALUES ('00001','2022-01-17','success');
INSERT INTO app_data  VALUES ('00001','2022-01-18','success');
INSERT INTO app_data  VALUES ('00001','2022-01-19','timeout');
INSERT INTO app_data  VALUES ('00001','2022-01-20','timeout');
INSERT INTO app_data  VALUES ('00001','2022-01-21','success');
INSERT INTO app_data  VALUES ('00001','2022-01-22','success');
INSERT INTO app_data  VALUES ('00002','2022-01-18','success');
INSERT INTO app_data  VALUES ('00003','2022-01-18','success');

SELECT
client_id,
max( max_day ) 
FROM
(
SELECT
client_id,
num3,
count(case when a=0 then null when a=1 then num3 end) AS max_day 
FROM
(
SELECT
*,
num2 - num1 AS num3 
FROM
(
SELECT
*,
sum( a ) over ( PARTITION BY client_id ORDER BY login_day ) num1,
count( a ) over ( PARTITION BY client_id ORDER BY login_day ) num2 
FROM
( SELECT *, CASE WHEN result = 'success' THEN 1 ELSE 0 END AS a FROM app_data t ) t1 
) t2 
) t3 
GROUP BY
client_id,
num3 
) t4 
GROUP BY
client_id

连续登陆问题之变形2(mysql)

某游戏公司记录的用户每日登录数据

id       login_datetime
1001 2021-12-12
1001 2021-12-13
1001 2021-12-14
1001 2021-12-16
1001 2021-12-19
1001 2021-12-20
1002 2021-12-12
1002 2021-12-16
1002 2021-12-17
计算每个用户最大的连续登录天数,可以间隔一天。解释:如果一个用户在 1,3,5,6 登录游戏,则视为连续 6 天登录。

LAG函数用法

LAG函数是一种用于在结果集中访问前一行的函数。它可以用来计算当前行和前一行之间的差异,或者用于计算移动平均值等分析操作。

LAG函数的基本语法如下: LAG(<列名>, <偏移量>, <默认值>) OVER (PARTITION BY <分组列> ORDER BY <排序列>)

以下是一些LAG函数的示例用法:

  1. 计算当前行与前一行的差值: SELECT value, LAG(value) OVER (ORDER BY id) AS lag_value FROM table;

  2. 计算移动平均值: SELECT value, AVG(value) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS moving_avg FROM table;

  3. 计算每个分组内的差值: SELECT group, value, LAG(value) OVER (PARTITION BY group ORDER BY id) AS lag_value FROM table;

  4. 当结果集中没有前一行时,返回默认值: SELECT value, LAG(value, 1, 0) OVER (ORDER BY id) AS lag_value FROM table;

以上是LAG函数的基本用法,根据具体的需求可以进行更复杂的分析操作。请注意,LAG函数在不同的数据库系统中的用法可能会有所差异,具体语法和参数请参考相应的数据库文档。

1.使用lag 函数 将日期往下移动一位 null值用1970-01-01 替换

2. 计算两个相邻登陆日期之间的差值,如果大于2则返回1,否则返回0,并按这个进行分组(id)累加

3.按id,flag 分组 求其最大日期和最小日期之间的差值+1 

 

4. 在3的基础上求每个id最大的c_days 即为每个id 的最大连续登陆天数

-- drop table test
CREATE TABLE test (  
    id INT,  
    login_datetime DATE  
);


INSERT INTO test (id, login_datetime) VALUES  
(1001, '2021-12-12'),  
(1001, '2021-12-13'),  
(1001, '2021-12-14'),  
(1001, '2021-12-16'),  
(1001, '2021-12-19'),  
(1001, '2021-12-20'),  
(1002, '2021-12-12'),  
(1002, '2021-12-16'),  
(1002, '2021-12-17');


select
    id,
    max(c_days) max_c_days
from(
    select
        id,
        flag,
        datediff(max(login_date),min(login_date)) + 1 c_days
    from(
        select
            id,
            login_date,
            lag_date,
			if(datediff(login_date,lag_date) > 2,1,0) ,
            sum(if(datediff(login_date,lag_date) > 2,1,0)) over(partition by id order by login_date) flag
        from(
            select
                id,
                login_date,
                lag(login_date,1,'1970-01-01') over(partition by id order by login_date) as lag_date
            from(
                select
                    id,
                    login_datetime as  login_date
                from test
                group by id,login_datetime
                )t1 -- 用窗口函数lag从当前行向上一行获取用户登录日期
            )t2   -- 将天数相减找出超过两天的日期,每超过两天后的日期分为一组
        )t3   -- 然后按照组内最大天数减去最小天数就得到用户没超过两天的持续登录天数
    group by id,flag
    )t4
group by id   -- 求出用户登录天数最大值

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值