1 Hive 常见的查询
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Select
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list
| [DISTRIBUTE BY col_list] [SORT BY col_list]
]
[LIMIT [offset,] rows]
1.1 全表查询
select * from emp;
1.2 指定字段查询
SELECT t.empno,t.ename,t.deptno from emp t;
1.3 =
, >=
, <=
1.4 limit
1.5 between
SELECT t.empno,t.ename,t.deptno from emp t where t.sal between 800 and 1500;
1.6 is null
,is not null
, in
, not in
SELECT t.empno,t.ename,t.deptno from emp t where comm is null;
1.7 max,min,count,sum,avg
SELECT COUNT(*) cnt FROM emp;
SELECT MAX(sal) max_sal FROM emp;
SELECT SUM(sal) FROM emp;
SELECT avg(sal) FROM emp;
2 Hive 高阶查询
2.1 group by
- 按照部门分组,然后查询每个部门的平均工资
SELECT t.deptno,avg(sal) avg_sal from emp t GROUP BY t.deptno;
- 每个部门中每个岗位的最高薪水
SELECT t.deptno,t.job, max(t.sal) max_sal from emp t GROUP BY t.deptno,job;
2.2 having
where
是针对单条记录进行筛选having
针对分组结果进行筛选
- 求每个部门的平均薪资大于 2000 的部门
SELECT deptno,avg(sal) avg_sal from emp GROUP BY deptno having avg_sal>2000;
2.3 join .. on
,等值连接
SELECT e.empno,e.ename,e.deptno FROM emp e join dept d on e.deptno=d.deptno;
2.4 左连接 left join
- 以左表为准
SELECT e.empno,e.ename,e.deptno,d.dname FROM emp e left join dept d on e.deptno=d.deptno;
2.5 right join
SELECT e.empno,e.ename,e.deptno,d.dname FROM emp e right join dept d on e.deptno=d.deptno;
2.6 full join
(用的少)
SELECT e.empno,e.ename,e.deptno,d.dname FROM emp e full join dept d on e.deptno=d.deptno;