<Zhuuu_ZZ>Hive小项目之sql练习50题

5 篇文章 0 订阅

Hive小项目之sql练习50题

一 前言

本项目是学习Hive过程中遇到的一个练习集锦,适合初学Hive的朋友们。如有不正确的地方或者更好的建议麻烦动动小手留下宝贵的意见,谢谢!!!

二 数据准备

在这里插入图片描述
有需要数据的小伙伴请留下邮箱哦

三 创建表格与导入数据

创建四张外部表即本次项目的近源表用以存放数据

1)

create external table exstudents(
sid string,
sname string,
date string,
gender string)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile
location '/hive/exstudents';

2)

create external table excourse(
cid string,
cname string,
tid string)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile
location '/hive/excourse';

3)

create external table exteacher(
tid string,
tname string)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile
location '/hive/exteacher';

4)

create external table exscore(
sid string,
cid string,
score int)
row format delimited
fields terminated by '\t'
lines terminated by '\n'
stored as textfile
location '/hive/exscore';

上传数据:

load data local inpath '/root/hive/course.txt' into table excourse;
load data local inpath '/root/hive/student.txt' into table exstudents;
load data local inpath '/root/hive/teacher.txt' into table exteacher;
load data local inpath '/root/hive/score.txt' into table exscore;

创建内表

create table student as select * from exstudents;
create table course as select * from excourse;
create table teacher as select * from exteacher;
create table score as select * from exscore;

四 项目分析

(1) 查询"01"课程比"02"课程成绩高的学生的信息及课程分数

select s.*,a.score1,a.score2 from student s
join
(select s.sid,s.score score1,s1.score score2 from score s
join score s1 on s.sid=s1.sid
where s.cid='01' and s1.cid='02' and s.score>s1.score) a
on s.sid=a.sid;

//或
select s3.sid,sname,sdate,sage,s1.score,s2.score from score s1
join score s2 on s1.sid=s2.sid and s1.cid=01 and s2.cid=02
join student  s3 on s3.sid=s2.sid
where s1.score>s2.score;//hive中join只能等值连接

(2) 查询"01"课程比"02"课程成绩低的学生的信息及课程分数:

select s.*,a.score1,a.score2 from student s
join
(select s.sid,s.score score1,s1.score score2 from score s
inner join score s1 on s.sid=s1.sid
where s.cid='01' and s1.cid='02' and s.score<s1.score) a
on s.sid=a.sid;

//或
select s3.sid,sname,sdate,sage,s1.score,s2.score from score s1
join score s2 on s1.sid=s2.sid and s1.cid=01 and s2.cid=02
join student  s3 on s3.sid=s2.sid
where s1.score<s2.score;

(3) 查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:

select s.sid,sname,a.avg from student s
join
(select sid,avg(score) as avg from score group by sid having avg>=60) a
on s.sid=a.sid;

(4) 查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:– (包括有成绩的和无成绩的)

select s.sid,sname,if(a.avg is null,0,a.avg) from student s
left join
(select sid,avg(score) as avg from score group by sid) a
on s.sid=a.sid
where if(a.avg is null,0,a.avg)<60;

//或
select s1.sid,sname,avg(score) avg from student s
left join score s1
on s.sid=s1.sid
group by s1.sid,sname
having avg<60 or avg is null;

(5) 查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:

select s.sid,sname,if(a.num is null,0,a.num),if(a.sum is null,0,a.sum) from student s
left join
(select sid,count(1) num,sum(score) as sum from score group by sid) a
on s.sid=a.sid;

//或
select student.sid,sname,count(score),sum(score) from score
right join student on score.sid=student.sid
group by student.sid,sname;

(6) 查询"李"姓老师的数量:

select count(1) from teacher where tname like '李%';

(7) 查询学过"张三"老师授课的同学的信息:

select stu.sid,sname,date,gender from student stu
join score on stu.sid=score.sid
join course on score.cid=course.cid
join teacher on course.tid=teacher.tid
where tname='张三';

with
t1 as (select tid from teacher where tname='张三'),
t2 as (select cid from course c,t1 where c.tid=t1.tid),
t3 as (select distinct sid from score  join t2 on score.cid=t2.cid )
select s.sid,sname,date,gender from student s join t3
on s.sid=t3.sid ;

(8) 查询没学过"张三"老师授课的同学的信息:

with
t1 as (select stu.sid,sname,date,gender from student stu
join score on stu.sid=score.sid
join course on score.cid=course.cid
join teacher on course.tid=teacher.tid
where tname='张三')
select stu.sid,stu.sname,stu.date,stu.gender from student stu left join
t1 on stu.sid=t1.sid where t1.sname is null;

//或
with
t1 as (select tid from teacher where tname='张三'),
t2 as (select cid from course c,t1 where c.tid=t1.tid),
t3 as (select distinct sid,score from score left join t2 on score.cid=t2.cid where t2.cid is not null)
select s.sid,sname,date,gender from student s left join t3
on s.sid=t3.sid where t3.score is null;

(9) 查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:

select s.sid,s.sname,s.date,s.gender from student s
join score s1 on s.sid=s1.sid
join score s2 on s2.sid=s1.sid
where s1.cid='01'
and s2.cid='02';

(10)查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:

with
t1 as (select sid from score where cid='01'),
t2 as (select sid from score where cid='02'),
t3 as (select t1.sid from t1 left join t2 on t1.sid=t2.sid where t2.sid is null)
select s.sid,sname,date,gender from student s join t3 on s.sid=t3.sid;

(11)查询没有学全所有课程的同学的信息:

with
t1 as (select count(1) as num from course),
t2 as (selekct sid,count(1) as num from score group by sid),
t3 as (select sid from t2,t1 where t2.num=t1.num)
select s.sid,sname,date,gender from student s left join t3 on s.sid=t3.sid where t3.sid is null;

//或
with
r1 as (select count(1) c1 from course),
r2 as (select sid,count(1) c2 from score group by sid )
select student.sid,sname,sdate,sage from student,r1
join r2 on student.sid=r2.sid
where c2<c1;

(12)查询至少有一门课与学号为"01"的同学所学相同的同学的信息:

select a.sid,sname,date,gender from student
join
(select sid from score s
where s.cid in (select cid from score where sid=01)
group by s.sid) a
on student.sid=a.sid
where a.sid<>01;


//或
with
t1 as (select cid from score where sid='01'),
t2 as (select distinct sid from score s left join t1 on s.cid=t1.cid where t1.cid is not null)
select s.sid,sname,date,gender from student s  join t2 on s.sid=t2.sid and s.sid!='01';

(13)查询和"01"号的同学学习的课程完全相同的其他同学的信息:

with
r1 as (select cid from score where sid=01),
r2 as (select count(1) c1 from r1),
r3 as (select sid,count(1) c2 from score join r1 on score.cid=r1.cid group by sid),
r4 as (select sid,count(1) c3 from score group by sid),
r5 as (select r3.sid from r3,r2 join r4 on r3.sid=r4.sid where c2==c1 and c1==c3)
select r5.sid,sname,sdate,sage from r5
join student on r5.sid=student.sid
where r5.sid!=01;

(14)查询没学过"张三"老师讲授的任一门课程的学生姓名:

select student.sname from student left join
(select sid from score join course on score.cid=course.cid 
 join teacher on course.tid=teacher.tid and teacher.tname='张三' ) a
on student.sid=a.sid where a.sid is null ;

(15)查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:

with
t1 as (select sid,count(1) num from score s where score<60 group by sid having num>=2),
t2 as (select s.sid,avg(score) avg from score s join t1 on s.sid=t1.sid group by s.sid)
select sname,s.sid,avg from student s join t2 on s.sid=t2.sid;

//或
select score.sid,sname,avg(score) from student
join score on student.sid=score.sid
where score<60
group by score.sid,sname
having count(1)>=2;

(16)检索"01"课程分数小于60,按分数降序排列的学生信息:

select s.sid,sname,date,gender,a.score from student s
join (select sid,score from score where cid='01' and score<60) as a on s.sid=a.sid
order by a.score desc;

(17)按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:

with
r1 as (select sid,avg(score) avg from score group by sid)
select score.sid,cid,score,avg from score 
join r1 on score.sid=r1.sid
order by avg desc;

(18)查询各科成绩最高分、最低分和平均分:

以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率

with 
t1 as (select cid,sid  from score group by cid,sid)
select score.cid,cname,max(score),min(score),avg(score),
sum(case when score>=60 then 1 else 0 end)/count(t1.sid)
 from score join course on score.cid=course.cid join t1 on t1.cid=score.cid group by score.cid,cname ;

(19)按各科成绩进行排序,并显示排名:

select sid,cid,score,rank() over(partition by cid order by score) from score;

(20)查询学生的总成绩并进行排名

//与执行顺序有关,先执行from->group by->函数计算->别名指定。。。
select sid,sum(score) sum,row_number() over(order by sum(score) desc) r from score 
group by sid 

//或
with
t1 as (select sid,sum(score) as sum from score group by sid )
select sid,sum,row_number() over(order by sum desc) from t1;

(21)查询不同老师所教不同课程平均分从高到低显示:

select c.tid,c.cid,avg(s.score) as avg from score s join course c
on s.cid=c.cid group by c.tid,c.cid order by avg desc;

(22)查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:

with
t1 as (select cid,sid,score,
row_number() over(partition by cid order by score desc) r from score )
select cid,student.sid,sname,date,gender ,score from t1 
join student on t1.sid=student.sid where r between 2 and 3;

(23)统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比

with
t1 as (select cid,count(1) c1 from score group by cid),
t2 as (select score.cid,cname,c1,
sum(case when score>85 and score<=100 then 1 else 0 end) `[100-85]`,
sum(case when score>70 and score<=85 then 1 else 0 end) `[85-70]`,
sum(case when score>60 and score<=70 then 1 else 0 end) `[70-60]`,
sum(case when score>0 and score<=60 then 1 else 0 end) `[0-60]`
from score join t1 on score.cid=t1.cid
join course on score.cid=course.cid
group by score.cid,cname,c1)
select cid,cname,`[100-85]`,`[100-85]`/c1 ,`[85-70]`,`[85-70]`/c1,`[70-60]`,`[70-60]`/c1,`[0-60]`,`[0-60]`/c1 from t2;

(24)查询学生平均成绩及其名次:

with
t1 as (select sid,avg(score) as avg from score group by sid)
select sid,avg,row_number() over(order by avg desc) from t1;

(25)查询各科成绩前三名的记录

with
t1 as (select cid,sid,score,row_number() over(partition by cid order by score desc) as rank from score)
select * from t1 where rank<=3;

(26)查询每门课程被选修的学生数:

select cid,count(1) from score
group by cid;

(27)查询出只有两门课程的全部学生的学号和姓名:

with
t1 as (select sid,count(1) from score group by sid having count(1)=2)
select s.sid,sname from student s join t1 on s.sid=t1.sid;

(28)查询男生、女生人数:

select sum(case when gender='男' then 1 else 0 end) man,
sum(case when gender='女' then 1 else 0 end) woman
from student;

//或
select gender,count(1) from student group by gender;

(29)查询名字中含有"风"字的学生信息:

select * from student where sname like '%风%';

(30)查询同名同性学生名单,并统计同名人数:

select sname,count(1) from student
group by sname
having count(1)>=2;

(31)查询1990年出生的学生名单:

select sname,date from student
where date like '1990%';

(32)查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:

select cid,avg(score) avg
from score
group by cid
order by avg desc,cid;

(33)查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:

with
t1 as (select sid,avg(score) avg from score group by sid having avg>=85)
select s.sid,sname,avg from student s,t1
where s.sid=t1.sid;

(34)查询课程名称为"数学",且分数低于60的学生姓名和分数:

with
t1 as (select cid from course where cname='数学'),
t2 as (select sid,score from score s,t1 where s.cid=t1.cid and score<60)
select sname,score from student s,t2 where s.sid=t2.sid;

//或
with
t1 as (select cid from course where cname='数学')
select sname,score from score join t1 on score.cid=t1.cid
join student on score.sid=student.sid
where score<60;

(35)查询所有学生的课程及分数情况:

select sid,
sum(case when cid='01' then score else 0 end) chinese,
sum(case when cid='02' then score else 0 end) math,
sum(case when cid='03' then score else 0 end) english
from score
group by sid;

(36)查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:

with
t1 as (select sid,cid,score from score where score>70 )
select sname,cname,score from t1
join student s on s.sid=t1.sid
join course c on c.cid=t1.cid;

(37)查询课程不及格的学生:

select distinct s.sid from student s
join score s1 on s1.sid=s.sid
where score<60;

(38)查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:

with
t1 as (select sid from score where cid='01' and score>=80)
select s.sid,sname from student s,t1 where s.sid=t1.sid;

(39)求每门课程的学生人数:

select cid,count(1) from score group by cid;

(40)查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:

with
t1 as (select score.cid,sid,score from score
join course on score.cid=course.cid
join teacher on course.tid=teacher.tid
where tname='张三'),
t2 as (select cid,sid,score,rank() over(partition by cid order by score desc) r from t1)
select cid,t2.sid,score ,sname,sdate,gender from t2 
join student on t2.sid=student.sid
where r=1;

(41)查询不同课程成绩相同的同一学生的学生编号、课程编号、学生成绩:

with
t1 as (select *,count(*) over(partition by sid, score) as num from score)
select sid,cid,score from t1 where num > 1;

//或
select distinct s1.sid,s1.cid,s1.score from score s1
join score s2 on s1.sid=s2.sid
and s1.score=s2.score
where s1.cid!=s2.cid;

(42)查询每门课程成绩最好的前三名:

with
t1 as (select cid,sid,score,row_number() over(partition by cid order by score desc) as rank from score)
select * from t1 where rank<=3;

(43)统计每门课程的学生选修人数(超过5人的课程才统计):

select cid,count(1) num from score group by cid having num>=5 order by num desc,cid;

(44)检索至少选修两门课程的学生学号:

select sid,count(cid) num from score group by sid having num>=2;

(45)查询选修了全部课程的学生信息:

with
t1 as (select count(1) num from course),
t2 as (select sid,count(distinct cid) num2 from score group by sid),
t3 as (select sid from t2,t1 where num2=num)
select * from student,t3
where student.sid=t3.sid;

(46)查询各学生的年龄(周岁):

select *,(year(current_date())-year(sdate)) as age from student;

(47)查询本周过生日的学生:

with
t1 as (select sid, concat_ws("-",cast (year(current_date) as string),cast(month(sdate) as string),cast(day(sdate) as string)) cw from student)
select sid from t1
where weekofyear(cw)=weekofyear(current_date());

(48)查询下周过生日的学生:

with
t1 as (select sid, concat_ws("-",cast (year(current_date) as string),cast(month(sdate) as string),cast(day(sdate) as string)) cw from student)
select sid from t1
where weekofyear(cw)=weekofyear(current_date())+1;

(49)查询本月过生日的学生:

select *
from student
where month(date)=month(current_date());

(50)查询12月份过生日的学生:

select *
from student
where month(date)=12;
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值