with t as (
select 1001 as id, '2021-12-21' as dt
union all
select 1001 as id, '2021-12-12' as dt
union all
select 1002 as id, '2021-12-12' as dt
union all
select 1001 as id, '2021-12-13' as dt
union all
select 1001 as id, '2021-12-16' as dt
union all
select 1002 as id, '2021-12-16' as dt
union all
select 1001 as id, '2021-12-19' as dt
union all
select 1002 as id, '2021-12-17' as dt
union all
select 1001 as id, '2021-12-20' as dt
)
-- 1) 将上一行时间数据下移
select id,
dt,
lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
from test3;记为 t1
得到:
1001 2021-12-12 1970-01-01
1001 2021-12-13 2021-12-12
1001 2021-12-14 2021-12-13
1001 2021-12-16 2021-12-14
1001 2021-12-19 2021-12-16
1001 2021-12-20 2021-12-19
-- 2) 将当前行时间减去上一行时间数据(datediff(dt1,dt2))
select id,
dt,
datediff(dt,lagdt) flag
from t1; 记为 t2
得到:
1001 2021-12-12 564564
1001 2021-12-13 1
1001 2021-12-14 1
1001 2021-12-16 2
1001 2021-12-19 3
1001 2021-12-20 1
-- 3) 按照用户分组,同时按照时间排序,计算从第一行到当前行大于2的数据的总条数(sum(if(flag>2,1,0)))
select id,
dt,
sum(if(flag>2,1,0)) over(partition by id order by dt) flag
from t2;记为 t3
得到:
1001 2021-12-12 1
1001 2021-12-13 1
1001 2021-12-14 1
1001 2021-12-16 1
1001 2021-12-19 2
1001 2021-12-20 2
-- 4) 按照用户和flag分组,求最大时间减去最小时间并加上1
select id,
flag,
datediff(max(dt),min(dt)) days
from t3
group by
id,flag; 记为 t4
得到:
1001 1 4
1001 2 1
-- 5)取连续登录天数的最大值
select id,
max(days)+1
from t4
group by
id;
得到:
1001 5
1002 2 (1002用户明细略)
-- 6) 将SQL拼接起来
select id,
max(days)+1
from
( select id,
flag,
datediff(max(dt),min(dt)) days
from
( select id,
dt,
sum(if(flag>2,1,0)) over(partition by id order by dt) flag
from
( select id,
dt,
datediff(dt,lagdt) flag
from
( select id,
dt,
lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
from test3
)t1
)t2
)t3
group by
id,flag
)t4
group by
id