大数据基础:HQL 入门必练50题(1-15题)
一、表数据
student
sid,sname,sbirth,ssex
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女
————————————————
course
cid,cname,tid
01 语文 02
02 数学 01
03 英语 03
————————————————
teacher
tid,tname
01 张三
02 李四
03 王五
————————————————
score
sid,cid,sscore
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
————————————————
二、建表
将以上表中分隔符皆改为','
后建表
create table student(sid int,sname string,sbirth string,ssex string) row format delimited fields terminated by ',';
create table course(cid int,cname string,tid int) row format delimited fields terminated by ',';
create table teacher(tid int,tname string) row format delimited fields terminated by ',';
create table score(sid int,cid int,sscore int) row format delimited fields terminated by ',';
三、习题
1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:
select student.*,a.sscore as 01_score,b.sscore as 02_score
from student
inner join score a on student.sid=a.sid and a.cid=01
--获得仅有01课程成绩的学生信息表,inner排除空值,若left输出空值
inner join score b on student.sid=b.sid and b.cid=02
/*仅有01课程成绩的学生信息表与score表left外连接,输出带有空值的01、02课程成绩的学生信息表,
若是inner内连接,输出的是所有有01成绩的人的01和02成绩信息表*/
where a.sscore>b.sscore;
2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数:
select student.*,a.sscore as 01_score,b.sscore as 02_score
from student
inner join score a on student.sid=a.sid and a.cid=01
left join score b on student.sid=b.sid and b.cid=02
where a.sscore<b.sscore;
3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:
select a.sid,sname,avg_score from lianxi_1.student a
inner join
(select sid,avg(sscore) avg_score from lianxi_1.score group by sid having avg_score >= 60) b
on a.sid = b.sid;--结果未保留两位小数
--round和having无法同时使用进行过滤
select a.sid,sname,avg_score from lianxi_1.student a
inner join
(select sid,round(avg(sscore),2) avg_score
from lianxi_1.score group by sid having avg_score >= 60) b
on a.sid = b.sid;--所以此处无法运行无法运行
--无法直接round精度取整,只能先round后嵌套where过滤
select a.sid,sname,b.avg_score from lianxi_1.student a
inner join
(select * from (select sid,round(avg(sscore),2) avg_score
from lianxi_1.score group by sid) t1 where t1.avg_score >= 60) b
on a.sid = b.sid;
4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩(包括有成绩的和无成绩的)
select a.sid,sname,avg_score from lianxi_1.student a
left join
(select sid,avg(sscore) avg_score from lianxi_1.score group by sid) b
on a.sid = b.sid where avg_score < 60 or avg_score is null;--结果未保留两位小数,有强迫症的可自行优化
5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:
select sid,count(1) course_cn,sum(sscore) sum_score from score group by sid;
--按学生编号查询选课总数、所有课程的总成绩
select a.sid id,sname,course_cn,sum_score from student a
inner join
(select sid,count(1) course_cn,sum(sscore) sum_score from score group by sid) b
on a.sid = b.sid;
--不显示空值,即无课无成绩同学不显示
select a.sid id,sname,course_cn,sum_score from student a
left join
(select sid,count(1) course_cn,sum(sscore) sum_score from score group by sid) b
on a.sid = b.sid;
--显示空值
6、查询"李"姓老师的数量:
select count(1) from teacher where tname like'李%';
7、查询学过"张三"老师授课的同学的信息:
select * from student a
inner join score b on a.sid = b.sid
inner join course c on b.cid = c.cid
inner join teacher d on c.tid = d.tid
where tname ='张三';
8、查询没学过"张三"老师授课的同学的信息:
select t1.sid,sname,sbirth,ssex from student t1
left join
(select b.sid,tname from score b
inner join course c on b.cid = c.cid
inner join teacher d on c.tid = d.tid
where tname ='张三')t2
on t1.sid = t2.sid
where t2.tname is null;
--小TIP:!!!!!!去重distinct只能放在select后面,不能放在字段中间
9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
--方法一:
select t1.sid, sname,sbirth,ssex from student t1 inner join
(select a.sid from
(select * from score where cid = 1) a inner join
(select * from score where cid = 2) b
on a.sid = b.sid) t2
on t1.sid = t2.sid --此步结束会乱序
order by t1.sid;
--方法二:
select t1.sid, sname,sbirth,ssex from student t1 inner join
(
select a.sid from score a inner join
(select sid from score where cid = 1) b on a.sid = b.sid
and cid = 2
) t2
on t1.sid = t2.sid --此步结束会乱序
order by t1.sid;
select a.sid,sname,sbirth,ssex from student a
where a.sid in (select sid from score where cid = 1)
and a.sid in (select sid from score where cid = 2);
--有问题,思考,where后面只能接一个过滤条件吗?
10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:
select t1.sid,sname,sbirth,ssex from student t1 inner join
(select a.sid from
(select sid from score where cid = 1) a left join
(select sid from score where cid = 2) b
on a.sid = b.sid where b.sid is null
) t2
on t1.sid = t2.sid --若输出结果不止一条,此步结束会乱序
order by t1.sid;
11、查询没有学全所有课程的同学的信息:
--方法一:仅显示上过课的同学,未显示一门课都没上的“08 王菊”
select a.* from student a ,
(select sid,count(1) cn from score group by sid having cn < 3) b
where a.sid = b.sid;
-- 方法二:
select a.* from student a where a.sid not in
(
select b.sid from
(select sid,count(1) cn from score group by sid
having cn = 3) b
);
--此举欠妥,这是已知只有三门的情况下才适用的,但若非有上帝视角,数据量一大此举便不可行
方法三:
select * from
(
select sid,t1.cn,
case when count(t2.cid)<t1.cn then '1'
else '0' end as cc
from
(select count(1) as cn from course ) as t1 ,score as t2
group by t2.sid,t1.cn
) as t4
left join student t3 on t4.sid=t3.sid
where t4.cc=1;
-- 未显示一门课都没上的“08 王菊”
-- 方法四:
select a.* from student a where a.sid not in
(
select t1.sid from
(select sid,count(1) cn from score group by sid) t1
inner join
(select count(1) cnn from course) t2
on t1.cn = t2.cnn
);
12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:
select t1.sid,sname,sbirth,ssex from student t1 inner join
(select a.sid from score a
where cid in (select cid from score where sid = 1)
group by a.sid) t2
on t1.sid = t2.sid;
13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
--建表时需要将cid改为字符串string,而不是int类型才好操作
>此代码引用自https://blog.csdn.net/Thomson617/article/details/83212338
-- –备注:hive不支持group_concat方法,可用 concat_ws(’|’, collect_set(str)) 实现
select student.*,t1.course_id from student
join (select sid ,concat_ws('|', collect_set(cid)) course_id from score
group by sid having sid not in (1))t1
on student.sid = t1.sid
join (select concat_ws('|', collect_set(cid)) course_id2
from score where sid=1)t2
on t1.course_id = t2.course_id2;
14、查询没学过"张三"老师讲授的任一门课程的学生姓名:
select t1.sid,sname,sbirth,ssex from student t1
left join
(select b.sid,tname from score b
inner join course c on b.cid = c.cid
inner join teacher d on c.tid = d.tid
where tname ='张三')t2
on t1.sid = t2.sid
where t2.tname is null;
15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
-- 代码思路:
--1.分数小于60(不及格)的人学号
select sid,sname,avg_score
select sid from score where sscore < 60;
--2.两门及其以上不及格课程的同学的学号
select a.sid,count(1) cn from
(select sid from score where sscore < 60) a
group by a.sid having cn > 1;
--3.求平均成绩和学号
select sid,avg(sscore) avg_score from score group by sid;
--最终代码:
select t1.sid,sname,avg_score from student t1 inner join
(select sid,avg(sscore) avg_score from score group by sid) t2
on t1.sid = t2.sid
inner join
(select a.sid,count(1) cn from
(select sid from score where sscore < 60) a
group by a.sid having cn > 1) t3
on t2.sid = t3.sid;
总结:
以上皆为本人练习结果,若有偏差请多指教,大家一起交流学习!
后续跳转–
大数据基础:HQL 入门必练50题(15-30题)
https://blog.csdn.net/m0_56919489/article/details/117590061