Hive SQL
统计每个部门获得前三高工资的所有员工及所有员工占前三工资总和的比例
---------------------------------建表开始----------------------------
-- 建表 temp_employee
CREATE TABLE IF NOT EXISTS temp_employee (
id BIGINT COMMENT '工号',
name STRING COMMENT '姓名',
salary BIGINT COMMENT '工资',
departmentid BIGINT COMMENT '部门编号'
)COMMENT 'employee员工信息表'
STORED AS ORCFILE COMPRESS;
-- 插入数据 temp_employee
INSERT INTO temp_employee VALUES(1,'Joe',85000,1),(2,'Henry',80000,2),(3,'Sam',60000,2),(4,'Max',90000,1),(5,'Janet',69000,1),(6,'Randy',85000,1),(7,'Will',70000,1);
-- 校验 temp_employee
SELECT id,name,salary,departmentid from temp_employee;
-------------------------------------------------------------------
-- 建表 temp_department
CREATE TABLE IF NOT EXISTS temp_department (
id BIGINT COMMENT '部门号',
name STRING COMMENT '部门名'
)COMMENT 'department部门信息表'
STORED AS ORCFILE COMPRESS;
-- 插入数据temp_department
INSERT INTO temp_department VALUES(1,'IT'),(2,'Sales');
-- 校验temp_department
SELECT id,name FROM temp_department;
---------------------------------建表完成----------------------------
-- 3、查询薪资占比
SELECT
dep.name AS department
,emp.name AS employee
,salary
,proportion
FROM
( -- 2、取每个部门工资排前三的员工部门号、姓名、工资,统计工资占比
SELECT
departmentid
,name
,salary
,CONCAT(ROUND(salary/SUM(salary) OVER(PARTITION BY departmentid)*100,2),'%') proportion -- 去掉order by 与没去掉什么区别?
FROM
( -- 1、分部门按工资紧密排序不跳跃
SELECT
departmentid
,salary
,name
,DENSE_RANK() OVER (PARTITION BY departmentid ORDER BY salary DESC) rk
FROM temp_employee
) dk
WHERE rk <= 3
) emp
JOIN
temp_department dep
ON emp.departmentid = dep.id
ORDER BY department,salary desc,employee desc