Hive窗口函数应用案例：分组TopN问题

最新推荐文章于 2022-11-25 16:47:59 发布

绝牛雷犁热刀

最新推荐文章于 2022-11-25 16:47:59 发布

阅读量145

点赞数

文章标签： hive sql big data

本文链接：https://blog.csdn.net/m0_58535137/article/details/125057968

版权

在这里插入图片描述
实现如下：

-- 分组TopN问题
/*
employee.txt

7369	SMITH	CLERK	7902	1980-12-17	800.00		20
7499	ALLEN	SALESMEN	7698	1981-2-20	1600.00	300.00	30
7521	WARD	SALESMEN	7698	1981-2-22	1250.00	500.00	30
7566	JONES	MANAGER	7839	1981-4-2	2975.00		20
7654	MARTIN	SALESMEN	7698	1981-9-28	1250.00	1400.00	30
7698	BLAKE	MANAGER	7839	1981-5-1	2850.00		30
7782	CLARK	MANAGER	7839	1981-6-9	2450.00		10
7788	SCOTT	ANALYST	7566	1987-4-19	3000.00		20
7839	KING	PRESIDENT		1981-11-17	5000.00		10
7844	TURNER	SALESMEN	7698	1981-9-8	1500.00	0.00	30
7876	ADAMS	CLERK	7788	1987-5-23	1100.00		20
7900	JAMES	CLERK	7698	1981-12-3	950.00		30
7902	FORD	ANALYST	7566	1981-12-3	3000.00		20
7934	MILLER	CLERK	7782	1982-1-23	1300.00		10
*/
create table employee
(
    emp_no     int comment '员工ID',
    emp_name   string comment '员工姓名',
    job        string comment '岗位',
    manager_id int comment '领导ID',
    hire_date  string comment '入职日期',
    salary     double comment '薪水',
    bonus      double comment '奖金',
    dept_no    int comment '部门编号'

) row format delimited fields terminated by '\t';

load data local inpath '/root/hivedata/employee.txt' overwrite into table employee;

select *
from employee;

-- 按部门分组，每个部门内员工薪资倒序排序
select emp_no,
       emp_name,
       salary,
       dept_no,
       row_number() over (partition by dept_no order by salary desc ) rn
from employee;

-- 求每个部门内薪资最高的前两名员工的信息
with tmp_emp as (
    select emp_no,
           emp_name,
           salary,
           dept_no,
           row_number() over (partition by dept_no order by salary desc ) rn
    from employee
)
select emp_no,
       emp_name,
       salary,
       dept_no,
       rn
from tmp_emp
where rn < 3;