原始链接:https://blog.csdn.net/Thomson617/article/details/83280617
Hive sql语句必练50题-入门到精通(2)
承接: Hive sql语句必练50题-入门到精通(1)
– 21、查询不同老师所教不同课程平均分从高到低显示:
– 方法1
-
select course.c_id,course.t_id,t_name,round(avg(s_score),
2)
as avgscore
from course
-
join teacher
on teacher.t_id=course.t_id
-
join score
on course.c_id=score.c_id
-
group
by course.c_id,course.t_id,t_name
order
by avgscore desc;
– 方法2
-
select course.c_id,course.t_id,t_name,round(avg(s_score),
2)
as avgscore
from course,teacher,score
-
where teacher.t_id=course.t_id
and course.c_id=score.c_id
-
group
by course.c_id,course.t_id,t_name
order
by avgscore desc;
– 22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:
-
select tmp1.
*
from
-
(
select
*
from score
where c_id
=
'01'
order
by s_score
desc limit
3)tmp1
-
order
by s_score
asc limit
2
-
union
all
select tmp2.
*
from
-
(
select
*
from score
where c_id
=
'02'
order
by s_score
desc limit
3)tmp2
-
order
by s_score
asc limit
2
-
union
all
select tmp3.
*
from
-
(
select
*
from score
where c_id
=
'03'
order
by s_score
desc limit
3)tmp3
-
order
by s_score
asc limit
2;
– 23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
-
select c.c_id,c.c_name,tmp1.s0_60, tmp1.percentum,tmp2.s60_70, tmp2.percentum,tmp3.s70_85, tmp3.percentum,tmp4.s85_100, tmp4.percentum
-
from course c
-
join(
select c_id,sum(
case
when s_score<
60
then
1
else
0
end )
as s0_60,
-
round(
100*sum(
case
when s_score<
60
then
1
else
0
end )/count(c_id),
2)
as percentum
-
from score
group
by c_id)tmp1
on tmp1.c_id =c.c_id
-
left
join(
select c_id,sum(
case
when s_score<
70
and s_score>=
60
then
1
else
0
end )
as s60_70,
-
round(
100*sum(
case
when s_score<
70
and s_score>=
60
then
1
else
0
end )/count(c_id),
2)
as percentum
-
from score
group
by c_id)tmp2
on tmp2.c_id =c.c_id
-
left
join(
select c_id,sum(
case
when s_score<
85
and s_score>=
70
then
1
else
0
end )
as s70_85,
-
round(
100*sum(
case
when s_score<
85
and s_score>=
70
then
1
else
0
end )/count(c_id),
2)
as percentum
-
from score
group
by c_id)tmp3
on tmp3.c_id =c.c_id
-
left
join(
select c_id,sum(
case
when s_score>=
85
then
1
else
0
end )
as s85_100,
-
round(
100*sum(
case
when s_score>=
85
then
1
else
0
end )/count(c_id),
2)
as percentum
-
from score
group
by c_id)tmp4
on tmp4.c_id =c.c_id;
– 24、查询学生平均成绩及其名次:
-
select tmp.
*,
row_number()
over(
order
by tmp.avgScore
desc) Ranking
from
-
(
select student.s_id,
-
student.s_name,
-
round(
avg(score.s_score),
2)
as avgScore
-
from student
join score
-
on student.s_id
=score.s_id
-
group
by student.s_id,student.s_name)tmp
-
order
by avgScore
desc;
– 25、查询各科成绩前三名的记录
–课程id为01的前三名
-
select score.c_id,course.c_name,student.s_name,s_score
from score
-
join student
on student.s_id=score.s_id
-
join course
on score.c_id=
'01' and course.c_id=score.c_id
-
order
by s_score desc limit
3;
–课程id为02的前三名
-
select score.c_id,course.c_name,student.s_name,s_score
-
from score
-
join student
on student.s_id=score.s_id
-
join course
on score.c_id=
'02' and course.c_id=score.c_id
-
order
by s_score desc limit
3;
–课程id为03的前三名
-
select score.c_id,course.c_name,student.s_name,s_score
-
from score
-
join student
on student.s_id=score.s_id
-
join course
on score.c_id=
'03' and course.c_id=score.c_id
-
order
by s_score desc limit
3;
– 26、查询每门课程被选修的学生数:
-
select c.c_id,c.c_name,tmp.number
from course c
-
join (
select c_id,count(
1)
as number
from score
-
where score.s_score<
60
group
by score.c_id)tmp
-
on tmp.c_id=c.c_id;
– 27、查询出只有两门课程的全部学生的学号和姓名:
-
select st.s_id,st.s_name
from student st
-
join (
select s_id
from score
group
by s_id
having
count(c_id)
=
2)tmp
-
on st.s_id
=tmp.s_id;
– 28、查询男生、女生人数:
-
select tmp1.man,tmp2.
women from
-
(
select count(1)
as man
from student
where s_sex=
'男')tmp1,
-
(
select count(1) as women from student where s_sex=
'女')tmp2;
– 29、查询名字中含有"风"字的学生信息:
select * from student where s_name like '%风%';
– 30、查询同名同性学生名单,并统计同名人数:
-
select s1.s_id,s1.s_name,s1.s_sex,count(*)
as sameName
-
from student s1,student s2
-
where s1.s_name=s2.s_name
and s1.s_id<>s2.s_id
and s1.s_sex=s2.s_sex
-
group
by s1.s_id,s1.s_name,s1.s_sex;
– 31、查询1990年出生的学生名单:
select * from student where s_birth like '1990%';
– 32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
-
select score.c_id,c_name,round(avg(s_score),
2)
as avgScore
from score
-
join course
on score.c_id=course.c_id
-
group
by score.c_id,c_name
order
by avgScore desc,score.c_id asc;
– 33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
-
select score.s_id,s_name,round(avg(s_score),
2)
as avgScore
from score
-
join student
on student.s_id=score.s_id
-
group
by score.s_id,
s_name having avg(s_score) >=
85;
– 34、查询课程名称为"数学",且分数低于60的学生姓名和分数:
-
select s_name,s_score
as mathScore
from student
-
join (
select s_id,s_score
-
from score,course
-
where score.c_id=course.c_id
and c_name=
'数学')tmp
-
on tmp.s_score <
60
and student.s_id=tmp.s_id;
– 35、查询所有学生的课程及分数情况:
-
select a.s_name,
-
SUM(
case c.c_name
when
'语文'
then b.s_score
else
0
end )
as chainese,
-
SUM(
case c.c_name
when
'数学'
then b.s_score
else
0
end )
as math,
-
SUM(
case c.c_name
when
'英语'
then b.s_score
else
0
end )
as english,
-
SUM(b.s_score)
as sumScore
-
from student a
-
join score b
on a.s_id
=b.s_id
-
join course c
on b.c_id
=c.c_id
-
group
by s_name,a.s_id;
后续部分参见:
https://blog.csdn.net/Thomson617/article/details/83281254
Hive下的SQL经验总结:
-
(
1).不支持非等值连接,一般使用
left
join、
right
join 或者
inner
join替代。
-
•
SQL中对两表内联可以写成:
-
select
*
from dual a,dual b
where a.key
= b.key;
-
•Hive中应为:
-
select
*
from dual a
join dual b
on a.key
= b.key;
-
而不是传统的格式:
-
SELECT t1.a1
as c1, t2.b1
as c2
FROM t1, t2
WHERE t1.a2
= t2.b2
-
-
(
2).分号字符:不能智能识别concat(‘;’,key),只会将‘;’当做
SQL结束符号。
-
•分号是
SQL语句结束标记,在HiveQL中也是,但是在HiveQL中,对分号的识别没有那么智慧,例如:
-
•
select concat(key,concat(
';',key))
from dual;
-
•但HiveQL在解析语句时提示:
-
FAILED: Parse Error: line
0:
-1 mismatched input
'<EOF>' expecting )
in
function specification
-
•解决的办法是,使用分号的八进制的ASCII码进行转义,那么上述语句应写成:
-
•
select concat(key,concat(
'\073',key))
from dual;
-
-
(
3).不支持
INSERT
INTO 表
Values(),
UPDATE,
DELETE等操作.这样的话,就不要很复杂的锁机制来读写数据。
-
INSERT
INTO syntax
is
only available starting
in version
0.8。
INSERT
INTO就是在表或分区中追加数据。
-
-
(
4).HiveQL中String类型的字段若是空(
empty)字符串, 即长度为
0, 那么对它进行
IS
NULL的判断结果是
False,使用
left
join可以进行筛选行。
-
-
(
5).不支持 ‘
< dt
<’这种格式的范围查找,可以用dt
in(”,”)或者
between替代。
-
-
(
6).Hive不支持将数据插入现有的表或分区中,仅支持覆盖重写整个表,示例如下:
-
INSERT OVERWRITE
TABLE t1
SELECT
*
FROM t2;
-
-
(
7).
group
by的字段,必须是
select后面的字段,
select后面的字段不能比
group
by的字段多.
-
如果
select后面有聚合函数,则该
select语句中必须有
group
by语句;
-
而且
group
by后面不能使用别名;
-
有聚合函数存在就必须有
group by.
-
-
(
8).
select ,
where 及
having 之后不能跟子查询语句(一般使用
left
join、
right
join 或者
inner
join替代)
-
-
(
9).先
join(及
inner
join) 然后
left
join或
right
join
-
-
(
10).hive不支持group_concat方法,可用 concat_ws(
'|', collect_set(str)) 实现
-
-
(
11).
not
in 和
<> 不起作用,可用
left
join tmp
on tableName.id
= tmp.id
where tmp.id
is
null 替代实现
-
-
(
12).hive 中‘不等于’不管是用! 或者
<>符号实现,都会将空值即
null过滤掉,此时要用
-
where (white_level
<>
'3'
or white_level
is
null)
-
或者
where (white_level
!=
'3'
or white_level
is
null ) 来保留
null 的情况。
-
-
(
13).
union
all 后面的表不加括号,不然执行报错;
-
hive也不支持顶层的
union
all,使用子查询来解决;
-
union
all 之前不能有DISTRIBUTE
BY
| SORT
BY
|
ORDER
BY
| LIMIT 等查询条件