# Hive窗口函数（排序、聚合、分析）

## 窗口函数

Function (arg1,…, arg n) OVER ([PARTITION BY <…>] [ORDER BY
<…>] [<window_clause>])

## 排序

row_number()
row_number()是对多有数值输出不同的序号，序号唯一连续。

K001,1,86,86
K002,1,95,181
K003,2,89,270
K004,1,83,353
K005,2,86,439
K006,3,92,531
K007,3,86,617
K008,1,88,705

create table student(
id string,
class int,
score int,
sum int)
row format delimited
fields terminated by ','
lines terminated by '\n';


load data local inpath '/opt/student.txt'
into table student;

select *,row_number()over(partition by class) from student;


select *,row_number()over(partition by class order by score desc) from student;


rank()

select *,rank() over(partition by score) from student;


 select *,rank() over(partition by class order by score) from student;


dense_rank()

select *,dense_rank() over(partition by class order by score) from student;


nlite(n)

select *,ntile(4) over(partition by class order by score) from student;


## 聚合

count()：计数，可以和distinct一起用。使用distinct时会很耗资源；
sum()：求和；
AVG():求平均数；
Max()/Min():求最大值/最小值；

create table students like student;

load data local inpat '/opt/student.txt'
into table students;


count：

select *,count(1) over(partition by class order by score)from students;


select *,count(1) over(partition by class)from students;


sum：

select *,sum(score) over(partition by class)from students;


select *,sum(score) over(partition by class order by score )from students;


## 分析

select lead(class,2) over(partition by class)from students;


first_value

 select*,first_value(sum)over(partition by class order by score) from students;


last_value

select*,last_value(score)over(partition by class order by score desc) from students;


## 窗口定义

ROWS BETWEEN <start_expr> AND <end_expr>

<start_expr>可以为下列值：
UNBOUNDED PRECEDING : 窗口起始位置（分组第一行）；
CURRENT ROW：当前行；
N PRECEDING/FOLLOWING：当前行之前/之后n行；

<end_expr>可以为下列值：
UNBOUNDED FOLLOWING : 窗口结束位置（分组最后一行）
CURRENT ROW：当前行

select*,max(score)over(partition by class order by score
rows between 2 preceding and current row) from students;


select*,max(score)over(partition by class order by score
rows between 2 preceding and 2 following) from students;

select*,max(score)over(partition by class order by score
rows between unbounded preceding and 2 following) from students;


09-20 1万+
06-03 7049

02-28 4730
06-05 8637
11-28 8200
08-21 3648
02-08 2万+
08-17 2万+
05-15 8万+