1. select
select [ALL | DISTINCT] select_expr,select_expr,...
from table_reference
[where where_condition]
[group by col_list [having condition]]
[cluster by col_list
| [distribute by col_list] [sort by | order by col_list]
]
[limit number]
2. 查询语法
- 全表查询
select * from score;
- 选择特定列
select s_id,c_id from score;
- 列别名
select s_id as myid ,c_id from score;
3. 常用函数
- count:求总行数
select count(1) from score;
- max:求分数的最大值
select max(s_score) from score;
- min:求分数的最小值
select min(s_score) from score;
- sum:求分数的总数
select sum(s_score) from score;
- avg:求分数的平均值
select avg(s_score) from score;
4.limit语句
用于限制返回的行数。
select* from score limit 3;
5.where语句
使用where子句,将不满足条件的行过滤掉。
where子句紧随from子句。
select * from score where s_score > 60;
- 比较运算符:
A <=> B
A [NOT] BETWEEN B AND C
A IS NULL
A IS NOT NULL
IN(数值1,数值2)
A [NOT] LIKE B
A RLIKE B,A REGEXP B
6. LIKE和RLIKE
%代表零个或多个字符(任意个字符)
_代表一个字符
例:查找以8开头的所有成绩
select * from score where s_score like '8%';
查找s_id中含1的数据
select * from score where s_id rlike '[1]'; # like '%1%'
7.逻辑运算符
AND:逻辑并
OR:逻辑或
NOT:逻辑否
8. 分组
- GROUP BY语句
通常会与聚合函数一起使用按照一个或多个列队结果进行分组,然后对每个组执行聚合操作。
例:求每个学生的平均成绩
select s_id ,avg(s_score) from score group by s_id;
- HAVING语句
对查询结果进行筛选。
例:求每个学生的平均成绩大于85的人
select s_id ,avg(s_score) avgscore from score group by s_id having avgscore > 85;
9.JOIN语句
将表关联到一起。
- 等值join
例:查询分数对应的姓名
select s.s_id,s.s_score,stu.s_name,stu.s_birth from score s join student stu on s.s_id = stu.s_id;
- 表的别名
好处:
- 使用别名可以简化查询
- 使用表名前缀可以提高执行效率
- 内连接
只有进行连接的两个表中都存在与连接条件相匹配的数据才会被保留下来。
(即两个表的并集)
select * from teacher t inner join course c on t.t_id = c.t_id;
- 左外连接
join操作符左边表中符合where子句的所有记录将会被返回。
select * from teacher t left join course c on t.t_id = c.t_id;
- 右外连接
join操作符右边表中符合where子句的所有记录将会被返回。
select * from teacher t right join course c on t.t_id = c.t_id;
- 多表连接
- 连接n个表,至少需要n-1个连接条件。
10.排序
- 全局排序
order by :全局排序(一个reduce)
使用order by子句排序:asc(升序) desc(降序)
select * from teacher t right join course c on t.t_id = c.t_id order by t.t_id desc;
- 按照别名排序
select s_id ,avg(s_score) from score group by s_id order by avg;
- 多个列排序
select s_id ,avg(s_score) from score group by s_id,avg;
- 每个MapReduce内部排序(sort by)局部排序
Sort By:每个MapReduce内部进行排序,对全局结果来说不是排序。
- 设置reduce个数
set mapreduce.job.reduces=3;
- 查看设置reduce个数
set mapreduce.job.reduce;
- 查询成绩按照成绩降序排列
select * from score sort by s_score;
- 将查询结果导入到文件中(按照成绩降序排列)
insert overwrite local directory '/export/servers/hivedatas/sort' select * from score sort by s_score;
- 分区排序
distribute by:结合sort by使用。 - cluster by(只能倒序)