实现如下:
-- 分组TopN问题
/*
employee.txt
7369 SMITH CLERK 7902 1980-12-17 800.00 20
7499 ALLEN SALESMEN 7698 1981-2-20 1600.00 300.00 30
7521 WARD SALESMEN 7698 1981-2-22 1250.00 500.00 30
7566 JONES MANAGER 7839 1981-4-2 2975.00 20
7654 MARTIN SALESMEN 7698 1981-9-28 1250.00 1400.00 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
7839 KING PRESIDENT 1981-11-17 5000.00 10
7844 TURNER SALESMEN 7698 1981-9-8 1500.00 0.00 30
7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
7900 JAMES CLERK 7698 1981-12-3 950.00 30
7902 FORD ANALYST 7566 1981-12-3 3000.00 20
7934 MILLER CLERK 7782 1982-1-23 1300.00 10
*/
create table employee
(
emp_no int comment '员工ID',
emp_name string comment '员工姓名',
job string comment '岗位',
manager_id int comment '领导ID',
hire_date string comment '入职日期',
salary double comment '薪水',
bonus double comment '奖金',
dept_no int comment '部门编号'
) row format delimited fields terminated by '\t';
load data local inpath '/root/hivedata/employee.txt' overwrite into table employee;
select *
from employee;
-- 按部门分组,每个部门内员工薪资倒序排序
select emp_no,
emp_name,
salary,
dept_no,
row_number() over (partition by dept_no order by salary desc ) rn
from employee;
-- 求每个部门内薪资最高的前两名员工的信息
with tmp_emp as (
select emp_no,
emp_name,
salary,
dept_no,
row_number() over (partition by dept_no order by salary desc ) rn
from employee
)
select emp_no,
emp_name,
salary,
dept_no,
rn
from tmp_emp
where rn < 3;