一、窗口函数
OVER()指定分析函数工作的数据窗口大小,这个数据窗口大小可能会随着行的变而变化
-
CURRENT ROW:当前行
-
n PRECEDING:往前n行数据
-
n FOLLOWING:往后n行数据
-
UNBOUNDED:起点,UNBOUNDED PRECEDING 表示从前面的起点, UNBOUNDED FOLLOWING表示到后面的终点
-
LAG(col,n):往前第n行数据
-
LEAD(col,n):往后第n行数据
-
NTILE(n):把有序分区中的行分发到指定数据的组中,各个组有编号,编号从1开始,对于每一行,NTILE返回此行所属的组的编号。注意:n必须为int类型。
select name,orderdate,cost,
sum(cost) over() as sample1,--所有行相加
sum(cost) over(partition by name) as sample2,--按name分组,组内数据相加
sum(cost) over(partition by name order by orderdate) as sample3,--按name分组,组内数据累加
sum(cost) over(partition by name order by orderdate rows between UNBOUNDED PRECEDING and current row ) as sample4 ,--和sample3一样,由起点到当前行的聚合
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING and current row) as sample5, --当前行和前面一行做聚合
sum(cost) over(partition by name order by orderdate rows between 1 PRECEDING AND 1 FOLLOWING ) as sample6,--当前行和前边一行及后面一行
sum(cost) over(partition by name order by orderdate rows between current row and UNBOUNDED FOLLOWING ) as sample7 --当前行及后面所有行
from business;
select name,orderdate,cost,
lag(orderdate,1,'1900-01-01') over(partition by name order by orderdate ) as time1, lag(orderdate,2) over (partition by name order by orderdate) as time2
from business;
查询前20%时间的订单信息
select * from (
select name,orderdate,cost, ntile(5) over(order by orderdate) sorted
from business
) t
where sorted = 1;
二、案例
- 统计截止到当前时间段的店铺累计销售金额
select shop_id, stat_date, ordamt, sum(ordamt) over(partition by shop_id order by stat_date)
from rt_data
where dt = '2015-01-11' and shop_id = 10026;
10026 201501120030 5170 5170
10026 201501120100 5669 10839
10026 201501120130 2396 13235
10026 201501120200 1498 14733
10026 201501120230 1997 16730
10026 201501120300 1188 17918
10026 201501120330 598 18516
10026 201501120400 479 18995
10026 201501120430 1587 20582
10026 201501120530 799 21381
- 统计每个时间段的销售占比
select shop_id, stat_date, ordamt, ordamt / sum(ordamt) over(partition by shop_id)
from rt_data
where dt = '2015-01-11' and shop_id = 10026
order by stat_date;
10026 201501120900 5406 0.014227
10026 201501120930 13098 0.034471
10026 201501121000 15679 0.041263
10026 201501121030 12134 0.031934
10026 201501121100 26259 0.069107
10026 201501121130 16516 0.043466
10026 201501121200 20127 0.052969
-
找出2点的销售金额及前半小时的销售金额和后1个小时的销售金额
lag(field, N)是取前N行的值,lead(field, N)是取后N行的值。
select * from (
select
shop_id,
stat_date,
ordamt,
lag(stat_date,1) over(partition by shop_id order by stat_date),
lag(ordamt,1) over(partition by shop_id order by stat_date),
lead(stat_date,2) over(partition by shop_id order by stat_date),
lead(ordamt,2) over(partition by shop_id order by stat_date)
from rt_data
where dt = '2015-01-11' and shop_id = 10026
) t
where stat_date = 201501120200;
10026 201501120200 1498 201501120130 2396 201501120300 1188
- 按销售金额排序,取出前20%的时间段和相应金额
select * from (
select shop_id, stat_date, ordamt, ntile(5) over(partition by shop_id order by ordamt desc) sorted
from rt_data
where dt = '2015-01-11' and shop_id = 10026
) t
where sorted = 1;
10026 201501121100 26259.0 1
10026 201501121200 20127.0 1
10026 201501121300 18138.0 1
10026 201501121130 16516.0 1
10026 201501121400 15956.0 1
10026 201501121000 15679.0 1
10026 201501122200 14290.0 1
10026 201501121430 14203.0 1
10026 201501121330 13290.0 1