连续销售记录查询

请查出整个数据集中,那些有过连续3天销售记录的店;

A,2017-10-11,300
A,2017-10-12,200
B,2017-10-11,400
B,2017-10-12,200
A,2017-10-13,100
B,2017-10-15,600
A,2017-10-15,100
C,2017-10-11,350
A,2017-10-16,300
C,2017-10-13,250
A,2017-10-17,150
C,2017-10-14,300
C,2017-10-15,400
A,2017-10-18,340
A,2017-10-19,360
C,2017-10-16,200
D,2017-10-13,500
E,2017-10-14,600
E,2017-10-15,500
D,2017-10-14,600

思路:
按name分组,dt进行排序,得到rank(1,2,3,4…),使用date_sub(string date,int i)获取new_dt=dt-rank的值,
按new_dt进行分组查询,统计count,若cont>=3,则有过连续三天的销售记录

创建表,并导入数据

create table u_homework(name string,dt string,records int) row format delimited fields terminated by ','; 
load data local inpath '/root/txt/homework.txt' into table u_homework;

1)按name分组,dt进行排序,得到rank(1,2,3,4…)

select name,dt,records,
row_number() over(partition by name order by dt) as rank 
from u_homework;

+-------+-------------+----------+-------+--+
| name  |     dt      | records  | rank  |
+-------+-------------+----------+-------+--+
| A     | 2017-10-11  | 300      | 1     |
| A     | 2017-10-12  | 200      | 2     |
| A     | 2017-10-13  | 100      | 3     |
| A     | 2017-10-15  | 100      | 4     |
| A     | 2017-10-16  | 300      | 5     |
| A     | 2017-10-17  | 150      | 6     |
| A     | 2017-10-18  | 340      | 7     |
| A     | 2017-10-19  | 360      | 8     |
| B     | 2017-10-11  | 400      | 1     |
| B     | 2017-10-12  | 200      | 2     |
| B     | 2017-10-15  | 600      | 3     |
| C     | 2017-10-11  | 350      | 1     |
| C     | 2017-10-13  | 250      | 2     |
| C     | 2017-10-14  | 300      | 3     |
| C     | 2017-10-15  | 400      | 4     |
| C     | 2017-10-16  | 200      | 5     |
| D     | 2017-10-13  | 500      | 1     |
| D     | 2017-10-14  | 600      | 2     |
| E     | 2017-10-14  | 600      | 1     |
| E     | 2017-10-15  | 500      | 2     |
+-------+-------------+----------+-------+--+

2)获取new_dt

select name,records,dt,rank,date_sub(dt,rank) as new_dt 
from 
(select name,dt,records,
row_number() over(partition by name order by dt) as rank
 from u_homework) tmp;

+-------+----------+-------------+-------+-------------+--+
| name  | records  |     dt      | rank  |   new_dt    |
+-------+----------+-------------+-------+-------------+--+
| A     | 300      | 2017-10-11  | 1     | 2017-10-10  |
| A     | 200      | 2017-10-12  | 2     | 2017-10-10  |
| A     | 100      | 2017-10-13  | 3     | 2017-10-10  |
| A     | 100      | 2017-10-15  | 4     | 2017-10-11  |
| A     | 300      | 2017-10-16  | 5     | 2017-10-11  |
| A     | 150      | 2017-10-17  | 6     | 2017-10-11  |
| A     | 340      | 2017-10-18  | 7     | 2017-10-11  |
| A     | 360      | 2017-10-19  | 8     | 2017-10-11  |
| B     | 400      | 2017-10-11  | 1     | 2017-10-10  |
| B     | 200      | 2017-10-12  | 2     | 2017-10-10  |
| B     | 600      | 2017-10-15  | 3     | 2017-10-12  |
| C     | 350      | 2017-10-11  | 1     | 2017-10-10  |
| C     | 250      | 2017-10-13  | 2     | 2017-10-11  |
| C     | 300      | 2017-10-14  | 3     | 2017-10-11  |
| C     | 400      | 2017-10-15  | 4     | 2017-10-11  |
| C     | 200      | 2017-10-16  | 5     | 2017-10-11  |
| D     | 500      | 2017-10-13  | 1     | 2017-10-12  |
| D     | 600      | 2017-10-14  | 2     | 2017-10-12  |
| E     | 600      | 2017-10-14  | 1     | 2017-10-13  |
| E     | 500      | 2017-10-15  | 2     | 2017-10-13  |
+-------+----------+-------------+-------+-------------+--+

3)分组查询,获取count(*)

select name,count(*) as counts from
(select name,records,dt,rank,date_sub(dt,rank) as new_dt 
from 
(select name,dt,records,
row_number() over(partition by name order by dt) as rank
 from u_homework) tmp) tmp2
group by name,new_dt;

+-------+------+--+
| name  | _c1  |
+-------+------+--+
| A     | 3    |
| A     | 5    |
| B     | 2    |
| B     | 1    |
| C     | 1    |
| C     | 4    |
| D     | 2    |
| E     | 2    |
+-------+------+--+

4)获取count>=3的数据,并去重

select distinct tmp3.name from
(select name,count(*) as counts from
(select name,records,dt,rank,date_sub(dt,rank) as new_dt 
from 
(select name,dt,records,
row_number() over(partition by name order by dt) as rank
 from u_homework) tmp) tmp2
group by name,new_dt) tmp3 where counts>=3;

+------------+--+
| tmp3.name  |
+------------+--+
| A          |
| C          |
+------------+--+
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值