原始数据 :
A,2017-10-11,300
A,2017-10-12,200
A,2017-10-13,100
A,2017-10-15,100
A,2017-10-16,300
A,2017-10-17,150
A,2017-10-18,340
A,2017-10-19,360
B,2017-10-11,400
B,2017-10-12,200
B,2017-10-15,600
C,2017-10-11,350
C,2017-10-13,250
C,2017-10-14,300
C,2017-10-15,400
C,2017-10-16,200
D,2017-10-13,500
E,2017-10-14,600
E,2017-10-15,500
D,2017-10-14,600
分析 : 给每个用户一个编号,用日期减去编号,如果是同一天,那么就是连续的.
A,2017-10-11,300,1,2017-10-10
A,2017-10-12,200,2,2017-10-10
A,2017-10-13,100,3,2017-10-10
A,2017-10-15,100,4,2017-10-11
A,2017-10-16,300,5,2017-10-11
A,2017-10-17,150,6,2017-10-11
A,2017-10-18,340,7,2017-10-11
A,2017-10-19,360,8,2017-10-11
B,2017-10-11,400
B,2017-10-12,200
B,2017-10-15,600
C,2017-10-11,350
C,2017-10-13,250
C,2017-10-14,300
C,2017-10-15,400
C,2017-10-16,200
D,2017-10-13,500
E,2017-10-14,600
E,2017-10-15,500
D,2017-10-14,600
1:建表,加载数据
create table t_jd(shopid string,dt string,sale int)
row format delimited fields terminated by ',';
load data local inpath '/root/sale.dat' into table t_jd;
2:打编号
select shopid,dt,sale,
row_number() over(partition by shopid order by dt) as rn
from t_jd;
结果 :
3 根据编号,生成连续日期
select shopid,dt,sale,rn,
date_sub(to_date(dt),rn)
from
(select shopid,dt,sale,
row_number() over(partition by shopid order by dt) as rn
from t_jd) tmp;
结果 :
4 分组,求count
select shopid,count(1) as cnt
from
(select shopid,dt,sale,rn,
date_sub(to_date(dt),rn) as flag
from
(select shopid,dt,sale,
row_number() over(partition by shopid order by dt) as rn
from t_jd) tmp) tmp2
group by shopid,flag;
结果 :
5 筛选出连续天数大于等于3的
select shopid from
(select shopid,count(1) as cnt
from
(select shopid,dt,sale,rn,
date_sub(to_date(dt),rn) as flag
from
(select shopid,dt,sale,
row_number() over(partition by shopid order by dt) as rn
from t_jd) tmp) tmp2
group by shopid,flag) tmp3
where tmp3.cnt>=3;
结果 :
6 去重
select distinct shopid from
(select shopid,count(1) as cnt
from
(select shopid,dt,sale,rn,
date_sub(to_date(dt),rn) as flag
from
(select shopid,dt,sale,
row_number() over(partition by shopid order by dt) as rn
from t_jd) tmp) tmp2
group by shopid,flag) tmp3
where tmp3.cnt>=3;
结果 :
---------------------
作者:俊杰梓
来源:CSDN
原文:https://blog.csdn.net/weixin_35353187/article/details/82218519
版权声明:本文为博主原创文章,转载请附上博文链接!