已知登陆表中有 uid、login_time,求每个用户的最大连续登陆天数
1、模拟数据集 user_login
WITH user_login as (
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-01 10:00:00', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-02 10:01:00', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-03 10:02:03', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-06 10:04:03', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-07 10:03:10', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'B' uid,
from_unixtime(
unix_timestamp('2020-08-12 10:03:11', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'B' uid,
from_unixtime(
unix_timestamp('2020-08-13 10:04:00', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'B' uid,
from_unixtime(
unix_timestamp('2020-08-14 12:04:06', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'A' uid,
from_unixtime(
unix_timestamp('2020-08-15 12:04:10', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'B' uid,
from_unixtime(
unix_timestamp('2020-08-16 13:04:12', "yyyy-MM-dd HH:mm:ss")
) as login_time
union all
select 'B' uid,
from_unixtime(
unix_timestamp('2020-08-17 15:05:10', "yyyy-MM-dd HH:mm:ss")
) as login_time
)
2、将记录按照每个用户分区,对登录时间进行升序排列,得到数据集 user_log_rank
代码:
user_log_rank as
(
select uid,
login_time,
row_number() over(
partition by uid
order by login_time
) as sort
from (
select uid,
date(login_time) as login_time
from user_login
group by uid,
date(login_time)
) as rs_user_login
)
结果:
3、每条记录的登录时间减去排序的数字,得到一个组 date_group,对用户及date_group 做聚合,continue_days 即为连续登录天数
代码:
select uid,
date_sub(login_time, sort) as date_group,
min(login_time) as start_dt,
max(login_time) as end_dt,
count(1) as continue_days
from user_log_rank a
group by uid,
date_sub(login_time, sort)
结果:
4、以 uid 分组,max(continue_days )即为每个用户的最大连续登录天数
代码:
select uid,
max(continue_days) as max_continue_days
from (
select uid,
date_sub(login_time, sort) as date_group,
min(login_time) as start_dt,
max(login_time) as end_dt,
count(1) as continue_days
from user_log_rank a
group by uid,
date_sub(login_time, sort)
) as rs_continue_days
group by uid
结果: