创建数据库:
建表:
create table student(id string,name string,birthday string,sex string) row format delimited fields terminated by ’ ';
create table course(id string,name string,tid string) row format delimited fields terminated by ‘\t’;
create table teacher(id string,name string) row format delimited fields terminated by ‘\t’;
create table score(sid string,cid string,score int) row format delimited fields terminated by ‘\t’;
数据:
vi /home/bigdata/data/spark/sql/student.csv
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女vi /home/bigdata/data/spark/sql/course.csv
01 语文 02
02 数学 01
03 英语 03
vi /home/bigdata/data/spark/sql/teacher.csv
01 张三
02 李四
03 王五
vi /home/bigdata/data/spark/sql/score.csv
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
导入Hive表
load data local inpath ‘/home/zyc/temp/student.csv’ into table student;
load data local inpath ‘/home/zyc/temp/course.csv’ into table course;
load data local inpath ‘/home/zyc/temp/teacher.csv’ into table teacher;
load data local inpath ‘/home/zyc/temp/score.csv’ into table score;
练习题:
1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:
select stu.* ,s3.score 01score,s4.score 02score from student stu
join score s3 on stu.id=s3.sid and s3.cid=‘01’
join score s4 on stu.id=s4.sid and s4.cid=‘02’
where stu.id in (select s1.sid from score s1 join score s2 on s1.sid=s2.sid
where s1.cid=‘01’ and s2.cid=‘02’ and s1.score>s2.score);
2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数:
select stu.* ,s3.score 01score,s4.score 02score from student stu join score s3 on stu.id=s3.sid and s3.cid=‘01’
join score s4 on stu.id=s4.sid and s4.cid=‘02’ where stu.id in (select s1.sid from score s1 join score s2 on s1.sid=s2.sid
where s1.cid=‘01’ and s2.cid=‘02’ and s1.score<s2.score);
3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:
select stu.id id,stu.name name ,avg(s1.score) scoreavg from
student stu join score s1 on s1.sid=stu.id group by stu.id,stu.name having avg(s1.score)>=60;
4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:
select stu.id id,stu.name name ,avg(s1.score) scoreavg from
student stu join score s1 on s1.sid=stu.id group by stu.id,stu.name having avg(s1.score)<60;
5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:
select stu.id id ,stu.name name,count(1) count,sum(s1.score) sumscore from
student stu left join score s1 on stu.id=s1.sid group by stu.name,stu.id ;
6、查询"李"姓老师的数量:
select count(1) count from teacher t where t.name like “李%”;
7、查询学过"张三"老师授课的同学的信息:
select * from student where id in select sid from score
where cid in (select c.id from course c where c.tid in (select t.id from teacher t where t.name=‘张三’));
8、查询没学过"张三"老师授课的同学的信息:
select stu.* from student stu left join ( select sid sid from score
where cid in (select c.id from course c where c.tid in (select t.id from teacher t where t.name=‘张三’))) b on stu.id=b.sid where b.sid is null;
9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
select * from student where id in (select s1.sid from score s1 join score s2 on s1.sid=s2.sid where s1.cid=‘01’ and s2.cid=‘02’
10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:
select stu.* from student stu left join (select s1.sid id from
score s1 join score s2 on s1.sid=s2.sid
where s1.cid=‘01’ and s2.cid=‘02’ ) b on stu.id=b.id where b.id is null and stu.id in (select s2.sid from score s2 where s2.cid=‘01’);
11、查询没有学全所有课程的同学的信息:
select stu.* from student stu
join (select s1.sid sid , b.count count from score s1 join (select count(1) count from course) b on 1=1
group by s1.sid,count having count(1) <count) d on stu.id=d.sid ;
12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:
select stu.* from student stu join
( select distinct s2.sid sid from score s2 where (select s1.cid from score s1 where s1.sid=‘01’)) b on stu.id=b.sid ;
13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
select stu.* from student stu left join
(select sid from score s1 left join (select cid cid from score where sid=‘01’) b on b.cid=s1.cid where b.cid is null) d on d.sid=stu.id where d.sid is null
and stu.id in (select s1.sid from score s1 join (select count() count from score where sid=‘01’) b on 1=1 group by sid,b.count having count()=b.count)
and stu.id!=‘01’;
14、查询没学过"张三"老师讲授的任一门课程的学生姓名:
select stu.* from student stu left join (select s1.sid sid from score s1 join (select c.id cid from course c join (select t.id tid from teacher t where t.name=‘张三’)b on c.tid=b.tid) e on e.cid=s1.cid) f on stu.id=f.sid where f.sid is null;
15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
select stu.id sid ,stu.name name,round(avg(s2.score),2) avg from student stu join (select s1.sid sid from score s1 where s1.score <60 group by s1.sid having count(1)>=2) b on b.sid=stu.id join score s2 group by stu.id,stu.name ;
16、检索"01"课程分数小于60,按分数降序排列的学生信息:
select stu.* from student stu join (select sid,score from score where cid=‘01’ and score<60 order by score desc) b on b.sid=stu.id ;
17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:
select a.sid,tmp1.score as chinese,tmp2.score as math,tmp3.score as english,
round(avg (a.score),2) as avgScore from score a
left join (select sid,score from score s1 where cid=‘01’)tmp1 on tmp1.sid=a.sid
left join (select sid,score from score s2 where cid=‘02’)tmp2 on tmp2.sid=a.sid
left join (select sid,score from score s3 where cid=‘03’)tmp3 on tmp3.sid=a.sid
group by a.sid,tmp1.score,tmp2.score,tmp3.score order by avgScore desc;
18、查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率:
–及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
select b.cid cid ,b.cname cname,b.max max, b.min min ,round(b.avg,2) avg ,round(c.count/b.count,2) ratio,round(d.count/b.count,2) ratio1,round(e.count/b.count,2) ratio2,round(f.count/b.count,2) ratio3 from
(select s1.cid cid ,c1.name cname,max(s1.score) max,min(s1.score) min ,avg(s1.score) avg,count() count from score s1 join course c1 on c1.id=s1.cid group by s1.cid,c1.name) b
join (select cid cid ,count() count from score where (score>=60 and score < 70) group by cid) c on b.cid=c.cid
join (select cid cid ,count() count from score where (score>=70 and score<80) group by cid) d on b.cid=d.cid
join (select cid cid ,count() count from score where (score>=80 and score<90) group by cid) e on b.cid=e.cid
join (select cid cid ,count() count from score where (score>=90 and score<100) group by cid) f on b.cid=f.cid;
19、按各科成绩进行排序,并显示排名:
select stu.name name, s1. ,row_number() over(partition by s1.cid order by s1.score desc) as rank
from score s1 join student stu on stu.id=s1.sid;
20、查询学生的总成绩并进行排名:
select b.sname,b.sum ,row_number() over(order by b.sum desc) from
(select stu.name sname,sum(s1.score) sum from score s1 join student stu on stu.id=s1.sid group by stu.name)b;
21、查询不同老师所教不同课程平均分从高到低显示:
select t1.name name,c1.name tname, avg(s1.score) avg from score s1
join course c1 on c1.id=s1.cid join teacher t1 on t1.id=c1.tid group by t1.name,c1.name order by avg desc;
22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:
select * from (select stu.* ,row_number() over(partition by s1.cid order by s1.score desc) as rank
from score s1 join student stu on s1.sid=stu.id) b where (b.rank between 2 and 3) ;
23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
同18
24、查询学生平均成绩及其名次:
select b.sid sid,b.sname name,b.avg avg,row_number() over(order by avg desc)as rank from
(select s1.sid sid,stu.name sname,avg(s1.score) avg from score s1 join student stu on stu.id=s1.sid group by s1.sid,stu.name) b;
25、查询各科成绩前三名的记录
select b.sid sid ,b.cid cid ,b.score score ,b.rank rank,stu.* from
(select s1.sid sid, s1.cid cid ,s1.score score,row_number() over(partition by s1.cid order by s1.score ) as rank from score s1)b join student stu on stu.id=b.sid where rank between 1 and 3;
26、查询每门课程被选修的学生数:
select c1.name name,s1.cid cid,count() count from score s1 join course c1 on c1.id=s1.cid group by s1.cid,c1.name;
27、查询出只有两门课程的全部学生的学号和姓名:
select b.sid sid ,b.name name from
(select s1.sid sid ,stu.name name,count() count from score s1 join student stu on stu.id=s1.sid group by s1.sid,stu.name)b where b.count=2;
28、查询男生、女生人数:
select stu.sex ,count() from student stu group by stu.sex;
29、查询名字中含有"风"字的学生信息:
select * from student stu where stu.name like ‘%风%’;
30、查询同名同性学生名单,并统计同名人数:
select b.name samename,b.count count from
(select stu.name name,count() count from student stu group by stu.name)b where b.count>1;
31、查询1990年出生的学生名单:
select stu.* from student stu where substr(stu.birthday,0,4)='1990;
32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
select s1.cid cid,round(avg(s1.score),2) as avg from score s1 group by s1.cid order by avg desc ,cid;
33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
select stu.id id ,stu.name,avg(s1.score) avg from score s1
join student stu on stu.id=s1.sid group by stu.id ,stu.name having avg>=85;
34、查询课程名称为"数学",且分数低于60的学生姓名和分数:
select stu.name name,s1.score score from course c1 join score s1 on s1.cid =c1.id join student stu on stu.id=s1.sid where c1.name=‘数学’ and s1.score>=60;
35、查询所有学生的课程及分数情况:
select stu.name name,stu.id id ,s1.cid cid ,s1.score score from student stu join score s1 on stu.id=s1.sid ;
36、查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:
select stu.name sname ,c1.name cname,s1.score score from score s1 join student stu on stu.id=s1.sid join course c1 on c1.id=s1.cid where s1.score >70;
37、查询课程不及格的学生:
select stu.,c1.name cname from student stu join score s1 on s1.sid=stu.id join course c1 on c1.id=s1.cid where s1.score <60;
38、查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:
select stu.name name,stu.id id from score s1 join student stu on stu.id=s1.sid where s1.cid='01’and s1.score>80;
39、求每门课程的学生人数:
select s1.cid cid ,count() from score s1 group by s1.cid ;
40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:
select * from (select stu.,s1.score,rank() over(order by s1.score)as rank from teacher t1
join course c1 on t1.id=c1.tid join score s1 on s1.cid=c1.id join student stu on stu.id=s1.sid where t1.name=‘张三’) b where b.rank=1;
41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:
select s2.sid sid,s2.cid cid ,d.score score from (select c.score score,count() count from
(select distinct b.score score,s1.cid from ( select s1.score score,count() count from score s1 group by s1.score) b
join score s1 on s1.score=b.score where b.count>=2)c group by c.score)d join score s2 on s2.score=d.score where d.count>1 ;
42、查询每门课程成绩最好的前三名:
select stu.name,c1.name cname,b.rank rank from (select s1.sid sid,s1.cid cid ,row_number() over(partition by s1.cid order by s1.score desc ) as rank from score s1)b
join student stu on stu.id =b.sid join course c1 on c1.id=b.cid where b.rank <=3;
43、统计每门课程的学生选修人数(超过5人的课程才统计):
要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
select s1.cid cid ,count() count from score s1 group by s1.cid having count()>5 order by count desc ,cid;
44、检索至少选修两门课程的学生学号:
select s1.sid from score s1 group by s1.sid having count()>=2;
45、查询选修了全部课程的学生信息:
select stu.* from student stu
join (select s1.sid sid from score s1 join (select count() count from course c1)b
group by s1.sid,b.count having count()=b.count )c on c.sid=stu.id;
46、查询各学生的年龄(周岁):
按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
select
case when substr(stu.birthday,6,10)<substr(current_date(),6,10) then year(current_date)-year(stu.birthday) else (year(current_date)-year(stu.birthday)-1) end as age
from student stu;
47、查询本周过生日的学生:
select stu.name from student stu
where substr(stu.birthday,6,10) between substr(date_add(current_date(),-dayofweek(current_date)+1),6,10) and substr(date_add(current_date(),7-dayofweek(current_date)),6,10) ;
48、查询下周过生日的学生:
select stu.name from student stu
where substr(stu.birthday,6,10) between substr(date_add(current_date(),8-dayofweek(current_date)),6,10) and substr(date_add(current_date(),14-dayofweek(current_date)),6,10) ;
49、查询本月过生日的学生:
select stu.name from student stu where month(stu.birthday) =month(current_date);
50、查询12月份过生日的学生:
select stu.name from student stu where month(stu.birthday)=12;