1. 数据
字段解释: dt 登录时间 , mid 设备id
2022-01-01 1001
2022-01-01 1002
2022-01-01 1003
2022-01-02 1001
2022-01-02 1002
2022-01-03 1001
2022-01-03 1003
2022-01-04 1001
2022-01-04 1002
2022-01-04 1003
2022-01-05 1002
2022-01-05 1003
2022-01-05 1004
2022-01-05 1005
2022-01-06 1002
2022-01-06 1003
2022-01-06 1005
2022-01-07 1001
2022-01-07 1002
2022-01-07 1003
2. 建表
create table t_7_3(
dt date ,
mid int
)row format delimited fields terminated by "\t";
3. 实现
3.1 过滤及开窗排序
先找出(指定日期)最近七日内的所有记录 , 并按照设备id做开窗函数, 按照时间字段排序, 使用rank 或者row_number都可以
select
dt , mid ,
rank() over (partition by mid order by dt) rank
from t_7_3
where dt between date_add("2022-01-07",-6) and "2022-01-07";
3.2 等差分组
利用当前时间值和排名的差值进行计算, 结合等差数列的特点, 连续登录的时间在进行差值计算后肯定会是同一天的日期, 计算完成后, 按照计算之后的日期和设备id进行分组, 统计每个组中日期的个数
select
mid,
count(date_diff) daycnt
from
(
select
mid,
date_sub(dt,rank) date_diff
from
(
select
dt , mid ,
rank() over (partition by mid order by dt) rank
from t_7_3
where dt between date_add("2022-01-07",-6) and "2022-01-07"
)t1
)t2
group by mid , date_diff;
3.3 过滤选取
过滤出个数大于3的, 记为连续登录的天数超过3天的设备
select
mid
from
(
select
mid,
count(date_diff) daycnt
from
(
select
mid,
date_sub(dt,rank) date_diff
from
(
select
dt , mid ,
rank() over (partition by mid order by dt) rank
from t_7_3
where dt between date_add("2022-01-07",-6) and "2022-01-07"
)t1
)t2
group by mid , date_diff
)t3
where daycnt >= 3;