-
建表插入数据
create table tmp_continous
(
id STRING ,
time DATETIME
);
INSERT OVERWRITE TABLE tmp_continous
Select '201', '2017-01-01 00:00:00' union all
Select '201','2017-01-02 00:00:00' union all
Select '202','2017-01-02 00:00:00' union all
Select '202','2017-01-03 00:00:00' union all
Select '203','2017-01-03 00:00:00' union all
Select '201','2017-01-04 00:00:00' union all
Select '202','2017-01-04 00:00:00' union all
Select '201','2017-01-05 00:00:00' union all
Select '202','2017-01-05 00:00:00' union all
Select '201','2017-01-06 00:00:00' union all
Select '203','2017-01-06 00:00:00' union all
Select '203','2017-01-07 00:00:00';
- 去重数据,保证每天有且仅有一条登陆数据
select
id,
time
from tmp_continous
group by id,time
- 对数据进行排序处理
select id,
time,
ROW_NUMBER() OVER (PARTITION by id order by time) as order_sort
from tmp_continous
- 利用登陆时间 - 排序得到的序列号,如果减去后得到的结果是同一天,则说明是连续的;否则是不连续的。这里需要仔细体会以下,也可以看下面这张图。
- 比如是2017-01-01 sort为1;2017-01-02 sort为2;减去sort之后都是 2016-12-31。如果出现一个 2017-01-04 sort为3,那么减去后的结果是2017-01-01那么就是不连续的
select
id,
time,
order_sort,
dateadd(time,order_sort,'dd') as simi_day
from(
select
id,
time,
cast(concat("-",cast(order_sort as STRING)) as INT) as order_sort
from(
select id,time, ROW_NUMBER() OVER (PARTITION by id order by time) as order_sort
from tmp_continous
) t
)m
- group by 取count、max等
select id,max(continous ) from(
select t2.id,count(t2.simi_day) as continous
from
(select
id,
time,
order_sort,
dateadd(time,order_sort,'dd') as simi_day
from(
select
id,
time,
cast(concat("-",cast(order_sort as STRING)) as INT) as order_sort
from(
select id,time, ROW_NUMBER() OVER (PARTITION by id order by time) as order_sort
from tmp_continous
) t
)m )t2
group by t2.id,t2.simi_day) z
group by id;
- 需求在进一步,需要当前最大的连续登陆天数
- 方法是利用时间的由大到小排(刚才是由小到大排),然后同样得出排列号码,不过要在基础上减一,因为当天的date_add是 + 0 而不是跟上面那样减一
- 后续的操作跟上面一样