--创建数据库
create database if not exists db_0913;
--使用数据
use db_0913;
--创建雇员表
CREATE EXTERNAL TABLE db_0913.emp(
empno INT,
ename STRING,
job STRING,
mgr INT,
hiredate STRING,
sal DOUBLE,
comm DOUBLE,
deptno INT
)
ROW FORMAT
DELIMITED FIELDS TERMINATED BY "\t"
LINES TERMINATED BY "\n";
--创建部门表
CREATE EXTERNAL TABLE db_0913.dept(
deptno INT,
dname STRING,
loc STRING
)
ROW FORMAT
DELIMITED FIELDS TERMINATED BY "\t"
LINES TERMINATED BY "\n";
--导入数据
LOAD DATA LOCAL INPATH '/opt/datas/dept.txt' OVERWRITE INTO TABLE db_0913.dept;
LOAD DATA LOCAL INPATH '/opt/datas/emp.txt' OVERWRITE INTO TABLE db_0913.emp;
--查看hive支持的函数
-- show functions
--查看一个函数怎么用
-- desc function XXX
-- 1-查询部门编号是30的员工并且薪资大于500的
select
empno,ename,sal,deptno
from
emp
where
deptno=30 and sal > 500;
-- 2-查询薪资最高的前3条记录
select
empno,ename,sal,deptno
from
emp
order by sal desc
limit 3
-- 3-查询员工表中有哪些部门
写法一:
select
deptno
from
emp
group by deptno
写法二:
select
distinct deptno
from
emp
--注意:
-- group by 的底层就是distinct
-- 4-查询员工号大于7800的员工编号和对应的部门名称
select
distinct empno,dname
from
emp
join
dept
where
empno >7800
======================
select
t.empno,t.ename,t.deptno,d.dname
from
(
select
empno,ename,deptno
from
emp
where
empno >7800
) t
join
dept d
on
t.deptno = d.deptno
-- 5-查询薪资在800到1500之间的员工姓名,工号,职位,薪水
select
ename,empno,job,sal
from
emp
where
sal between 800 and 1500
-- 6-查询奖金不为空的且薪资不低于500的
select
ename,empno,job,sal,comm
from
emp
where
comm is not NULL and sal >500
-- 7-每个部门的平均工资
-- round(a,b),四舍五入,b:保留小数点几位
-- avg() 平均数
select
deptno,round(avg(sal),2) as avg_sal
from
emp
group by
deptno
-- 8-每个部门中每个岗位的最高薪资
select
deptno,job,max(sal) as max_sal
from
emp
group by
deptno,job
-- 9-平均工资大于2000的部门名称和平均工资
select
d.dname,t.avg_sal
from
(select
deptno,avg(sal) as avg_sal
from
emp
group by
deptno
) t
join
dept d
on
t.deptno =d.deptno
where
t.avg_sal >2000
select
d.dname,e.avg_sal
from
(
select
deptno,round(avg(sal)) as avg_sal
from
emp
group by
deptno
having
round(avg(sal)) >2000
) e
join
dept d
on e.deptno = d.deptno
-- 10-等值连接(inner)
select * from dept d join emp e on e.deptno=d.deptno
-- 11-左连接
select * from dept d left outer join emp e on e.deptno=d.deptno
-- 12-右连接
select * from dept d right outer join emp e on e.deptno=d.deptno
-- 13-全连接(mysql不支持全连接)
select * from dept d full outer join emp e on e.deptno=d.deptno
select基本语法:
select
...
from
tableName
where
过滤条件
group by
分组
having
分组之后的过滤
order by
字段 asc|desc
limit
100
在一些需求中
“每”,“每个”,"每天",“各个” group by
统计各个部门 平均工资 大于 2K部门。。
group by having
--查找各个部门 工资 前三的用户
-- 假设:4个部门,每个部门5个人,
-- 结果:12条记录
。。。。
---hive的分析函数
--ROW_NUMBER
--PARTITION:分区、分组
select
empno,ename,sal,deptno,
ROW_NUMBER() over (PARTITION by deptno order by sal desc) as rnk
from
emp
empno ename sal deptno rnk
7839 KING 5000.0 10 1
7782 CLARK 2450.0 10 2
7934 MILLER 1300.0 10 3
7788 SCOTT 3000.0 20 1
7902 FORD 3000.0 20 2
7566 JONES 2975.0 20 3
7876 ADAMS 1100.0 20 4
7369 SMITH 800.0 20 5
7698 BLAKE 2850.0 30 1
7499 ALLEN 1600.0 30 2
7844 TURNER 1500.0 30 3
7654 MARTIN 1250.0 30 4
7521 WARD 1250.0 30 5
7900 JAMES 950.0 30 6
select
t.deptno,t.sal,t.rnk
from
(
select
empno,ename,sal,deptno,
ROW_NUMBER() over (PARTITION by deptno order by sal desc) as rnk
from
emp
) t
where
t.rnk <4
t.deptno t.sal t.rnk
10 5000.0 1
10 2450.0 2
10 1300.0 3
20 3000.0 1
20 3000.0 2
20 2975.0 3
30 2850.0 1
30 1600.0 2
30 1500.0 3
--分析函数:RANK,如果相同的值,排名是一样的。
-- 比如:有2个第一名,那么就没有第二名了
select
empno,ename,sal,deptno,
RANK() over (PARTITION by deptno order by sal desc) as rnk
from
emp
empno ename sal deptno rnk
7839 KING 5000.0 10 1
7782 CLARK 2450.0 10 2
7934 MILLER 1300.0 10 3
7788 SCOTT 3000.0 20 1
7902 FORD 3000.0 20 1
7566 JONES 2975.0 20 3
7876 ADAMS 1100.0 20 4
7369 SMITH 800.0 20 5
7698 BLAKE 2850.0 30 1
7499 ALLEN 1600.0 30 2
7844 TURNER 1500.0 30 3
7654 MARTIN 1250.0 30 4
7521 WARD 1250.0 30 4
7900 JAMES 950.0 30 6
--分析函数:DENSE_RANK,如果相同的值,排名是一样的。
-- 比如:有2个第一名,那么还有第二名了
select
empno,ename,sal,deptno,
DENSE_RANK() over (PARTITION by deptno order by sal desc) as rnk
from
emp
empno ename sal deptno rnk
7839 KING 5000.0 10 1
7782 CLARK 2450.0 10 2
7934 MILLER 1300.0 10 3
7788 SCOTT 3000.0 20 1
7902 FORD 3000.0 20 1
7566 JONES 2975.0 20 2
7876 ADAMS 1100.0 20 3
7369 SMITH 800.0 20 4
7698 BLAKE 2850.0 30 1
7499 ALLEN 1600.0 30 2
7844 TURNER 1500.0 30 3
7654 MARTIN 1250.0 30 4
7521 WARD 1250.0 30 4
7900 JAMES 950.0 30 5