Hive SQL Windowing and Analytics Functions 窗口和分析函数
直接上示例
SQL
select id,
name,
value,
first_value(value) over(partition by name ) as first_v,-- 分组内排序后,截止到当前行,第一个值
last_value(value) over(partition by name ) as last_v,-- 分组内排序后,截止到当前行,最后一个值
lag(value,1,0) over(partition by name ) as lag_v,--当前行的前n行
lead(value,1,0) over(partition by name ) as lead_v,--当前行的后n行
row_number() over(partition by name order by value ) as rn,--排序相同时不会重复,会根据顺序排序,排序结果1、2、3
rank() over(partition by name order by value ) as rk,--排序相同时会重复,总数不变,,排序结果1、1、3
dense_rank() over(partition by name order by value ) as dense_rk,--排序相同时会重复,总数会减少,排序结果1、1、2
sum(value) over(partition by name ) as sum_v,--分组求和
sum(value) over(partition by name order by value) as acc_sum_v1,--分组累计求和,排序相同会重复
sum(value) over(partition by name order by value,id) as acc_sum_v2,--分组累计求和,排序不同的情况
round(avg(value) over(partition by name ) ,2) as avg_v,
round(avg(value) over(partition by name order by value),2) as acc_avg_v1,
round(avg(value) over(partition by name order by value,id),2) as acc_avg_v2,
count(value) over(partition by name ) as count_v,
count(value) over(partition by name order by value) as acc_count_v1,
count(value) over(partition by name order by value,id) as acc_count_v2,
min(value) over(partition by name ) as min_v,
min(value) over(partition by name order by value) as acc_min_v1,
min(value) over(partition by name order by value,id) as acc_min_v2,
max(value) over(partition by name ) as max_v,
max(value) over(partition by name order by value) as acc_max_v1,
max(value) over(partition by name order by value,id) as acc_max_v2
from (
select 1 as id,'a' as name ,1 as value union all
select 2 as id,'a' as name ,1 as value union all
select 3 as id,'a' as name ,3 as value union all
select 1 as id,'b' as name ,10 as value union all
select 2 as id,'b' as name ,20 as value union all
select 3 as id,'b' as name ,30 as value
)t
运行结果
+-----+-------+--------+----------+---------+--------+---------+-----+-----+-----------+--------+-------------+-------------+--------+-------------+-------------+----------+---------------+---------------+--------+-------------+-------------+--------+-------------+-------------+
| id | name | value | first_v | last_v | lag_v | lead_v | rn | rk | dense_rk | sum_v | acc_sum_v1 | acc_sum_v2 | avg_v | acc_avg_v1 | acc_avg_v2 | count_v | acc_count_v1 | acc_count_v2 | min_v | acc_min_v1 | acc_min_v2 | max_v | acc_max_v1 | acc_max_v2 |
+-----+-------+--------+----------+---------+--------+---------+-----+-----+-----------+--------+-------------+-------------+--------+-------------+-------------+----------+---------------+---------------+--------+-------------+-------------+--------+-------------+-------------+
| 1 | a | 1 | 1 | 3 | 0 | 1 | 1 | 1 | 1 | 5 | 2 | 1 | 1.67 | 1.0 | 1.0 | 3 | 2 | 1 | 1 | 1 | 1 | 3 | 1 | 1 |
| 2 | a | 1 | 1 | 3 | 1 | 3 | 2 | 1 | 1 | 5 | 2 | 2 | 1.67 | 1.0 | 1.0 | 3 | 2 | 2 | 1 | 1 | 1 | 3 | 1 | 1 |
| 3 | a | 3 | 1 | 3 | 1 | 0 | 3 | 3 | 2 | 5 | 5 | 5 | 1.67 | 1.67 | 1.67 | 3 | 3 | 3 | 1 | 1 | 1 | 3 | 3 | 3 |
| 1 | b | 10 | 10 | 30 | 0 | 20 | 1 | 1 | 1 | 60 | 10 | 10 | 20.0 | 10.0 | 10.0 | 3 | 1 | 1 | 10 | 10 | 10 | 30 | 10 | 10 |
| 2 | b | 20 | 10 | 30 | 10 | 30 | 2 | 2 | 2 | 60 | 30 | 30 | 20.0 | 15.0 | 15.0 | 3 | 2 | 2 | 10 | 10 | 10 | 30 | 20 | 20 |
| 3 | b | 30 | 10 | 30 | 20 | 0 | 3 | 3 | 3 | 60 | 60 | 60 | 20.0 | 20.0 | 20.0 | 3 | 3 | 3 | 10 | 10 | 10 | 30 | 30 | 30 |
+-----+-------+--------+----------+---------+--------+---------+-----+-----+-----------+--------+-------------+-------------+--------+-------------+-------------+----------+---------------+---------------+--------+-------------+-------------+--------+-------------+-------------+