50道Hive SQL练习题

建表

create table student(s_id string,s_name string,s_birth string,s_sex string) row format delimited fields terminated by '\t';
create table course(c_id string,c_name string,t_id string) row format delimited fields terminated by '\t';
create table teacher(t_id string,t_name string) row format delimited fields terminated by '\t';
create table score(s_id string,c_id string,s_score int) row format delimited fields terminated by '\t';

生成数据

vi /export/data/hivedatas/student.csv

01 赵雷 1990-01-01 男

02 钱电 1990-12-21 男

03 孙风 1990-05-20 男

04 李云 1990-08-06 男

05 周梅 1991-12-01 女

06 吴兰 1992-03-01 女

07 郑竹 1989-07-01 女

08 王菊 1990-01-20 女

vi /export/data/hivedatas/course.csv

01 语文 02

02 数学 01

03 英语 03

vi /export/data/hivedatas/teacher.csv

01 张三

02 李四

03 王五

vi /export/data/hivedatas/score.csv

01 01 80

01 02 90

01 03 99

02 01 70

02 02 60

02 03 80

03 01 80

03 02 80

03 03 80

04 01 50

04 02 30

04 03 20

05 01 76

05 02 87

06 01 31

06 03 34

07 02 89

07 03 98

导数据到hive

load data local inpath '/export/data/hivedatas/student.csv' into table student;
load data local inpath '/export/data/hivedatas/course.csv' into table course;
load data local inpath '/export/data/hivedatas/teacher.csv' into table teacher;
load data local inpath '/export/data/hivedatas/score.csv' into table score;

–注:–hive查询语法

SELECT [ALL | DISTINCT] select_expr, select_expr, ...    FROM table_reference    [WHERE where_condition]    [GROUP BY col_list [HAVING condition]]    [CLUSTER BY col_list      | [DISTRIBUTE BY col_list] [SORT BY| ORDER BY col_list]    ]    [LIMIT number]

– 1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:

select student.*,a.s_score as 01_score,b.s_score as 02_score

from student  

join score a on student.s_id=a.s_id and a.c_id='01'  

left join score b on student.s_id=b.s_id and b.c_id='02'

where  a.s_score>b.s_score;

–答案2

select student.*,a.s_score as 01_score,b.s_score as 02_score

from student

join score a on  a.c_id='01'

join score b on  b.c_id='02'

where  a.s_id=student.s_id and b.s_id=student.s_id and a.s_score>b.s_score;

– 2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数:

select student.*,a.s_score as 01_score,b.s_score as 02_score

from student

join score a on student.s_id=a.s_id and a.c_id='01'

left join score b on student.s_id=b.s_id and b.c_id='02'

where a.s_score<b.s_score;

–答案2

select student.*,a.s_score as 01_score,b.s_score as 02_score

from student

join score a on  a.c_id='01'

join score b on  b.c_id='02'

where  a.s_id=student.s_id and b.s_id=student.s_id and a.s_score<b.s_score;

– 3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:

select  student.s_id,student.s_name,tmp.平均成绩

from

student  

join (   select score.s_id,round(avg(score.s_score),1) as 平均成绩   from score group by s_id) as tmp  

on tmp.平均成绩>=60

where student.s_id = tmp.s_id

–答案2

select  student.s_id,student.s_name,round(avg (score.s_score),1) as 平均成绩

from student

join score

on student.s_id = score.s_id

group by student.s_id,student.s_name

having avg (score.s_score) >= 60;

– 4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:
– (包括有成绩的和无成绩的)

select  student.s_id,student.s_name,tmp.avgScore

from student

join (select score.s_id,round(avg(score.s_score),1) as avgScore from score group by s_id) as tmp

on tmp.avgScore < 60 where student.s_id=tmp.s_id

union all

select  s2.s_id,s2.s_name,0 as avgScore from student s2

where s2.s_id not in   (select distinct sc2.s_id from score sc2);

–答案2

select  score.s_id,student.s_name,round(avg (score.s_score),1) as avgScore

from studentinner

join score

on student.s_id=score.s_id

group by score.s_id,student.s_name

having avg (score.s_score) < 60

union all

select  s2.s_id,s2.s_name,0 as avgScore

from student s2

where s2.s_id not in   (select distinct sc2.s_id from score sc2);

– 5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:

select student.s_id,student.s_name,(count(score.c_id) )as total_count,sum(score.s_score)as total_score

from student left join score

on student.s_id=score.s_id 

group by student.s_id,student.s_name ;

– 6、查询"李"姓老师的数量:

select t_name,count(1) from teacher where t_name like '李%' group by t_name;

– 7、查询学过"张三"老师授课的同学的信息:

select student.*

from student 

join score on student.s_id =score.s_id 

join course on course.c_id=score.c_id

join teacher on course.t_id=teacher.t_id and t_name='张三';

– 8、查询没学过"张三"老师授课的同学的信息:

select student.*

from student

left join (select s_id from score join course on course.c_id=score.c_id join teacher on course.t_id=teacher.t_id and t_name='张三')tmp

on student.s_id =tmp.s_id

where tmp.s_id is null;

– 9、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:

select *

from student

join (select s_id from score where c_id =1 )tmp1

on student.s_id=tmp1.s_id

join (select s_id from score where c_id =2 )tmp2

on student.s_id=tmp2.s_id;

– 10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:

select student.*

from student

join (select s_id from score where c_id =1 )tmp1

on student.s_id=tmp1.s_idleft

join (select s_id from score where c_id =2 )tmp2

on student.s_id =tmp2.s_id

where tmp2.s_id is null;

– 11、查询没有学全所有课程的同学的信息:

–先查询出课程的总数量

select count(1) from course;

–再查询所需结果

select student.*

from studentleft join( select s_id from score group by s_id having count(c_id)=3)tmp

on student.s_id=tmp.s_id

where tmp.s_id is null;

–方法二(一步到位):

select student.*

from student

join (select count(c_id)num1 from course)tmp1

left join( select s_id,count(c_id)num2 from score group by s_id) tmp2

on student.s_id=tmp2.s_id and tmp1.num1=tmp2.num2

where tmp2.s_id is null;

– 12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:

select student.*

from student

join (select c_id from score where score.s_id=01)tmp1

join (select s_id,c_id from score)tmp2

on tmp1.c_id =tmp2.c_id and student.s_id =tmp2.s_id

where student.s_id not in('01')

group by student.s_id,s_name,s_birth,s_sex;

– 13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
–备注:hive不支持group_concat方法,可用 concat_ws(’|’, collect_set(str)) 实现

select student.*,tmp1.course_id

from student 

join (select s_id ,concat_ws('|', collect_set(c_id)) course_id from score group by s_id having s_id not in (1))tmp1

on student.s_id = tmp1.s_id

join (select concat_ws('|', collect_set(c_id)) course_id2 from score where s_id=1)tmp2

on tmp1.course_id = tmp2.course_id2;

– 14、查询没学过"张三"老师讲授的任一门课程的学生姓名:

select student.*

from student

left join (select s_id from score join (select c_id from course join teacher on course.t_id=teacher.t_id and t_name='张三')tmp2 on score.c_id=tmp2.c_id )tmp

on student.s_id = tmp.s_id 

where tmp.s_id is null;

– 15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:

select student.s_id,student.s_name,tmp.avg_score

from studentinner

join (select s_id from score where s_score<60 group by score.s_id having count(s_id)>1)tmp2on student.s_id = tmp2.s_id

left join ( select s_id,round(AVG (score.s_score)) avg_score from score group by s_id)tmp

on tmp.s_id=student.s_id;

– 16、检索"01"课程分数小于60,按分数降序排列的学生信息:

select student.*,s_score

from student,score

where student.s_id=score.s_id and s_score<60 and c_id='01'

order by s_score desc;

– 17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:

select a.s_id,tmp1.s_score as chinese,tmp2.s_score as math,tmp3.s_score as english, round(avg (a.s_score),2) as avgScore

from score a

left join (select s_id,s_score from score s1 where c_id='01')tmp1

on tmp1.s_id=a.s_id

left join (select s_id,s_score from score s2 where c_id='02')tmp2 on tmp2.s_id=a.s_idleft join (select s_id,s_score from score s3 where c_id='03')tmp3

on tmp3.s_id=a.s_id

group by a.s_id,tmp1.s_score,tmp2.s_score,tmp3.s_score

order by avgScore desc;

– 18.查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率:
–及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90

select course.c_id,course.c_name,tmp.maxScore,tmp.minScore,tmp.avgScore,tmp.passRate,tmp.moderate,tmp.goodRate,tmp.excellentRates

from course

join(

select c_id,max(s_score) as maxScore,min(s_score)as minScore,

round(avg(s_score),2) avgScore,

round(sum(case when s_score>=60 then 1 else 0 end)/count(c_id),2)passRate,

round(sum(case when s_score>=60 and s_score<70 then 1 else 0 end)/count(c_id),2) moderate,

round(sum(case when s_score>=70 and s_score<80 then 1 else 0 end)/count(c_id),2) goodRate,

round(sum(case when s_score>=80 and s_score<90 then 1 else 0 end)/count(c_id),2) excellentRates

from score group by c_id) tmp

on tmp.c_id=course.c_id;

– 19、按各科成绩进行排序,并显示排名:
– row_number() over()分组排序功能(mysql没有该方法)

select s1.*,row_number()over(order by s1.s_score desc) Ranking

from score s1

where s1.c_id='01'

order by noRanking asc

union all

select s2.*,row_number()over(order by s2.s_score desc) Ranking

from score s2

where s2.c_id='02'

order by noRanking asc

union all

select s3.*,row_number()over(order by s3.s_score desc) Ranking

from score s3

where s3.c_id='03'

order by noRanking asc;

– 20、查询学生的总成绩并进行排名:

select score.s_id,s_name,sum(s_score) sumscore,row_number()over(order by sum(s_score) desc) Ranking

from score ,student

where score.s_id=student.s_id

group by score.s_id,s_name

order by sumscore desc;

– 21、查询不同老师所教不同课程平均分从高到低显示:
– 方法1

select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore

from course

join teacher

on teacher.t_id=course.t_id

join score

on course.c_id=score.c_id  

group by course.c_id,course.t_id,t_name 

order by avgscore desc;

– 方法2

select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore

from course,teacher,score

where teacher.t_id=course.t_id and course.c_id=score.c_id    

group by course.c_id,course.t_id,t_name 

order by avgscore desc;

– 22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:

select tmp1.*

from (select * from score where c_id='01' order by s_score desc limit 3) tmp1 order by s_score asc limit 2

union all

select tmp2.* from (select * from score where c_id='02' order by s_score desc limit 3) tmp2 order by s_score asc limit 2

union all

select tmp3.* from (select * from score where c_id='03' order by s_score desc limit 3)tmp3    order by s_score asc limit 2;

– 23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比

select c.c_id,c.c_name,tmp1.s0_60, tmp1.percentum,tmp2.s60_70, tmp2.percentum,tmp3.s70_85, tmp3.percentum,tmp4.s85_100, tmp4.percentum

from course c

join(

select c_id,sum(case when s_score<60 then 1 else 0 end )as s0_60, round(100*sum(case when s_score<60 then 1 else 0 end )/count(c_id),2)as percentum

from score group by c_id)tmp1 on tmp1.c_id =c.c_id

left join (

select c_id,sum(case when s_score<70 and s_score>=60 then 1 else 0 end )as s60_70, round(100*sum(case when s_score<70 and s_score>=60 then 1 else 0 end )/count(c_id),2)as percentum

from score group by c_id) tmp2 on tmp2.c_id =c.c_idleft

join(

select c_id,sum(case when s_score<85 and s_score>=70 then 1 else 0 end )as s70_85, round(100*sum(case when s_score<85 and s_score>=70 then 1 else 0 end )/count(c_id),2)as percentum

from score group by c_id) tmp3

on tmp3.c_id =c.c_idleft

join (

select c_id,sum(case when s_score>=85 then 1 else 0 end )as s85_100, round(100*sum(case when s_score>=85 then 1 else 0 end )/count(c_id),2)as percentum  

from score group by c_id)  tmp4 

 

on tmp4.c_id =c.c_id;

– 24、查询学生平均成绩及其名次:

select tmp.*,row_number()over(order by tmp.avgScore desc) Ranking

from (

select student.s_id, student.s_name, round(avg(score.s_score),2) as avgScore

from student join score on student.s_id=score.s_id group by student.s_id,student.s_name) tmp

order by avgScore desc;

– 25、查询各科成绩前三名的记录

–课程id为01的前三名

select score.c_id,course.c_name,student.s_name,s_score

from score

join student on student.s_id=score.s_id

join course on score.c_id='01' and course.c_id=score.c_id

order by s_score desc limit 3;  

–课程id为02的前三名

select score.c_id,course.c_name,student.s_name,s_score

from score

join student on student.s_id=score.s_id

join course on score.c_id='02' and course.c_id=score.c_id

order by s_score desc limit 3; 

–课程id为03的前三名

select score.c_id,course.c_name,student.s_name,s_score

from scorejoin student on student.s_id=score.s_id

join course on score.c_id='03' and course.c_id=score.c_id

order by s_score desc limit 3;

– 26、查询每门课程被选修的学生数:

select c.c_id,c.c_name,tmp.number

from course c

join (

select c_id,count(1) as number from score where score.s_score<60 group by score.c_id

)tmp    

on tmp.c_id=c.c_id;

– 27、查询出只有两门课程的全部学生的学号和姓名:

select st.s_id,st.s_name

from student st

join (select s_id from score group by s_id having count(c_id) =2) tmp    

on st.s_id=tmp.s_id;

– 28、查询男生、女生人数:

select tmp1.man,tmp2.women

from

(select count(1) as man from student where s_sex='男')tmp1,   

(select count(1) as women from student where s_sex='女')tmp2;

– 29、查询名字中含有"风"字的学生信息:

select * from student where s_name like '%风%';

– 30、查询同名同性学生名单,并统计同名人数:

select s1.s_id,s1.s_name,s1.s_sex,count(*) as sameName

from student s1,student s2

where s1.s_name=s2.s_name and s1.s_id<>s2.s_id and s1.s_sex=s2.s_sex

group by s1.s_id,s1.s_name,s1.s_sex;

– 31、查询1990年出生的学生名单:

select * from student where s_birth like '1990%';

– 32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:

select score.c_id,c_name,round(avg(s_score),2) as avgScore

from score

join course

on score.c_id=course.c_id  

group by score.c_id,c_name 

order by avgScore desc,score.c_id asc;

– 33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:

select score.s_id,s_name,round(avg(s_score),2)as avgScore

from score

join student

on student.s_id=score.s_id    

group by score.s_id,s_name 

having avg(s_score) >= 85;

– 34、查询课程名称为"数学",且分数低于60的学生姓名和分数:

select s_name,s_score as mathScore

from student

join (select s_id,s_score from score,course where score.c_id=course.c_id and c_name='数学')tmp    

on tmp.s_score < 60 and student.s_id=tmp.s_id;

– 35、查询所有学生的课程及分数情况:

select a.s_name,

SUM(case c.c_name when '语文' then b.s_score else 0 end ) as chainese,

SUM(case c.c_name when '数学' then b.s_score else 0 end ) as math,

SUM(case c.c_name when '英语' then b.s_score else 0 end ) as english,

SUM(b.s_score) as sumScore

from student a

join score b on a.s_id=b.s_id

join course c on b.c_id=c.c_id    

group by s_name,a.s_id;

– 36、查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:

select student.s_id,s_name,c_name,s_score

from student

join (

select sc.*

from score sc

left join(select s_id from score where s_score < 70 group by s_id)tmp

on sc.s_id=tmp.s_id

where tmp.s_id is null

)tmp2

on student.s_id=tmp2.s_id

join course

on tmp2.c_id=course.c_id

order by s_id;

**-- 查询全部及格的信息**

select sc.*

from score sc

left join(select s_id from score where s_score < 60 group by s_id)tmp

on sc.s_id=tmp.s_id

where tmp.s_id is null;

**-- 或(效率低)**

select sc.*

from score sc

where sc.s_id not in (select s_id from score where s_score < 60 group by s_id);

– 37、查询课程不及格的学生:

select s_name,c_name as courseName,tmp.s_score

from student

join (select s_id,s_score,c_name from score,course where score.c_id=course.c_id and s_score < 60)tmp

on student.s_id=tmp.s_id;

–38、查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:

select student.s_id,s_name,s_score as score_01

from student

join score

on student.s_id=score.s_id

where c_id='01' and s_score >= 80;

– 39、求每门课程的学生人数:

select course.c_id,course.c_name,count(1)as selectNum

from course

join score

on course.c_id=score.c_id

group by course.c_id,course.c_name;

– 40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:

select student.*,tmp3.c_name,tmp3.maxScore

from (

select s_id,c_name,max(s_score)as maxScore

from score

join (select course.c_id,c_name from course join (select t_id,t_name from teacher where t_name='张三')tmp on course.t_id=tmp.t_id) tmp2

on score.c_id=tmp2.c_id

group by score.s_id,c_name

order by maxScore desc limit 1

)tmp3

join student on student.s_id=tmp3.s_id;

– 41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:

select distinct a.s_id,a.c_id,a.s_score

from score a,score b    

where a.c_id <> b.c_id and a.s_score=b.s_score;

– 42、查询每门课程成绩最好的前三名:

select tmp1.*

from (select *,row_number()over(order by s_score desc) ranking from score where c_id ='01') tmp1

where tmp1.ranking <= 3

union all

select tmp2.* from (select *,row_number()over(order by s_score desc) ranking from score where c_id ='02') tmp2 

where tmp2.ranking <= 3

union all

select tmp3.* from (select *,row_number()over(order by s_score desc) ranking from score where c_id ='03') tmp3

where tmp3.ranking <= 3;

– 43、统计每门课程的学生选修人数(超过5人的课程才统计):
– 要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列

select distinct course.c_id,tmp.num

from course

join (select c_id,count(1) as num from score group by c_id) tmp    

where tmp.num>=5 

order by tmp.num desc ,course.c_id asc;

– 44、检索至少选修两门课程的学生学号:

select s_id,count(c_id) as totalCourse

from score

group by s_id

having count(c_id) >= 2;

– 45、查询选修了全部课程的学生信息:

select student.* from student, (select s_id,count(c_id) as totalCourse from score group by s_id)tmp

where student.s_id=tmp.s_id and totalCourse=3;

–46、查询各学生的年龄(周岁):
– 按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
方法一

select

s_name,

s_birth,

(

year(CURRENT_DATE)-

year(s_birth)-

(

case when

month(CURRENT_DATE) < month(s_birth) then 1

when

month(CURRENT_DATE) = month(s_birth) and day(CURRENT_DATE) < day(s_birth) then 1

else 0 end)

) as age    

from student;

方法二:

select s_name,s_birth,

floor((datediff(current_date,s_birth) - floor((year(current_date) - year(s_birth))/4))/365) as age

from student;

– 47、查询本周过生日的学生:
–方法1

select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);

–方法2

select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='10' and substring(s_birth,9,2)=14;

– 48、查询下周过生日的学生:
–方法1

select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);

–方法2

select s_name,s_sex,s_birth

from student

where substring(s_birth,6,2)='10' and substring(s_birth,9,2)>=15  and substring(s_birth,9,2)<=21;

– 49、查询本月过生日的学生:
–方法1

select * from student where MONTH(CURRENT_DATE) =MONTH(s_birth);

–方法2

select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='10';

– 50、查询12月份过生日的学生:

select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='12';

1.上传tar包 2.解压 tar -zxvf hive-1.2.1.tar.gz 3.安装mysql数据库 推荐yum 在线安装 4.配置hive (a)配置HIVE_HOME环境变量 vi conf/hive-env.sh 配置其中的$hadoop_home (b)配置元数据库信息 vi hive-site.xml 添加如下内容: javax.jdo.option.ConnectionURL jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true JDBC connect string for a JDBC metastore javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver Driver class name for a JDBC metastore javax.jdo.option.ConnectionUserName root username to use against metastore database javax.jdo.option.ConnectionPassword hadoop password to use against metastore database 5.安装hive和mysq完成后,将mysql的连接jar包拷贝到$HIVE_HOME/lib目录下 如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行) mysql -uroot -p #(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接) GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'root' WITH GRANT OPTION; FLUSH PRIVILEGES; 6. Jline包版本不一致的问题,需要拷贝hive的lib目录中jline.2.12.jar的jar包替换掉hadoop中的 /home/hadoop/app/hadoop-2.6.4/share/hadoop/yarn/lib/jline-0.9.94.jar 启动hive bin/hive ---------------------------------------------------------------------------------------------------- Hive几种使用方式: 1.Hive交互shell bin/hive 2.Hive JDBC服务(参考java jdbc连接mysql) 3.hive启动为一个服务器,来对外提供服务 bin/hiveserver2 nohup bin/hiveserver2 1>/var/log/hiveserver.log 2>/var/log/hiveserver.err & 启动成功后,可以在别的节点上用beeline去连接 bin/beeline -u jdbc:hive2://mini1:10000 -n root 或者 bin/beeline ! connect jdbc:hive2://mini1:10000 4.Hive命令 hive -e ‘sql’ bin/hive -e 'select * from t_test'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值