hive 简单查询

简单查询

between

-- SQL执行顺序:
    from > join > where > group by > having > select >order by 
-- 需求:获取薪资水平在 1000-3000
SELECT
*
FROM emp WHERE sal between 1000 and 3000

in

-- 获取emp表中,部门编号为 10 和 20 
SELECT
*
FROM emp WHERE deptno in (10,20)
​
-- 获取emp表中,大于平均薪资水平的员工
select
T2.*
from emp T2 where T2.sal > (select avg(T1.sal) from emp T1)
​
-- 获取emp表中,薪资水平有5000的部门中所有员工的信息
SELECT
*
FROM emp WHERE deptno in (SELECT deptno from emp T1 where T1.sal = 5000)

关联

JOIN

-- 在数据量较小的情况下,MR执行过程只有Map端,说明join发生在map端,
-- 在hive中对join有优化
SELECT
T1.*
,T2.deptname
FROM emp T1 JOIN dept T2 ON T1.deptno = T2.deptno
WHERE T2.deptno = 20;

LEFT JOIN

SELECT
T1.*
,T2.*
FROM dept T1 LEFT JOIN emp T2 ON T1.deptno = T2.deptno

RIGHT JOIN

-- 左右关联时,保存主表中的所有数据, RIGHT JOIN 以右表为主表,LEFT JOIN 以左表为主表
SELECT
T1.*
,T2.*
FROM dept T1 RIGHT JOIN emp T2 ON T1.deptno = T2.deptno

GROUP BY

-- 根据部门编号取各部门中薪资最高人员
SELECT
deptno
,max(sal) as sal
FROM emp
GROUP BY deptno 
​
-- 根据部门编号取各部门中薪资最高人员的详细人员信息
-- 注意:GROUP BY中不包含的字段不能在SELECT中存在
SELECT
deptno
,max(sal) as sal
FROM emp
GROUP BY deptno 
​
SELECT
T1.*
FROM emp T1 JOIN (
SELECT
deptno
,max(sal) as sal
FROM emp
GROUP BY deptno 
)T2 ON T1.deptno = T2.deptno AND T2.sal = T1.sal

排序

ORDER BY

查看及设置Reduce数量

set mapreduce.job.reduces = 2;

set mapreduce.job.reduces;

-- ORDER BY在执行过程中,只能获取一个Reduce,如果数据量较大的情况下,单个reduce处理速度会非常慢,
SELECT
T1.sal
,T1.empno
,T1.deptno
FROM emp T1
ORDER BY T1.sal

sort by

set mapreduce.job.reduces = -1;
SELECT
T1.sal
,T1.empno
,T1.deptno
FROM emp T1
sort by T1.sal
​
set mapreduce.job.reduces=2;
​
INSERT OVERWRITE LOCAL DIRECTORY "/usr/local/soft/data/emp_sort2"
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
SELECT
T1.sal
,T1.empno
,T1.deptno
FROM emp T1
sort by T1.sal
-- 通过查看输出文件,可以了解到,当reduce数量大于1时,会生成多个文件,并在文件中,数据是有序的

distribute by + sort by

-- distribute by是给字段进行做分区,和sort by 搭配使用时,可以对分区内的数据进行做排序
set mapreduce.job.reduces=3;
INSERT OVERWRITE LOCAL DIRECTORY "/usr/local/soft/data/emp_distribute"
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
SELECT
T1.sal
,T1.empno
,T1.deptno
FROM emp T1
distribute by T1.deptno sort by T1.deptno,T1.sal
-- 如果当排序列只有一个时,distribute by + sort by并且reduce数量大于1可能会产生错误结果

cluster by

set mapreduce.job.reduces=2
SELECT
T1.sal
,T1.empno
,T1.deptno
FROM emp T1
cluster by T1.deptno,T1.sal

去重

distinct

-- 当reduce数量大于1时也可以得到正确结果
set mapreduce.job.reduces=2;
select
distinct job
FROM emp

group by

-- 当reduce数量大于1时也可以得到正确结果
set mapreduce.job.reduces=2
select
job
FROM emp
GROUP BY job

WITH AS

#打印出每个部门工资最高的员工信息
WITH T2 as (
SELECT
deptno
,max(sal) as sal
FROM emp
GROUP BY deptno 
)
, T3 AS (
select T1.*
FROM emp T1 JOIN T2 ON T1.deptno = T2.deptno AND T2.sal = T1.sal
)
SELECT
*
FROM T3 JOIN T2 ON T3.deptno = T2.deptno
-- 场景:
-- 通常 WITH AS被应用在某些表被反复利用的场景中,或者是SQL相对比较复杂的业务逻辑中
​

拼接

UNION 去重

UNION ALL:不去重

函数

查看函数

-- 查询所有函数
show functions
-- 模糊查询
show functions like '%sub%';
-- 查看函数的描述信息
desc function 'substr';
-- 查看函数的描述信息及样例
desc function extended 'substr';

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值