创建表(在Linux中打开hive)
三分数据:students.txt,score.txt,cource.txt
students.txt
cource.txt
score.txt
create table students(
id bigint ,
name string ,
age int ,
gender string ,
clazz string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
create table score(
id bigint ,
score_id bigint ,
score int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据
create table subject (
subject_id bigint,
subject_name string
)row format delimited fields terminated by ',';
hdfs dfs -put /usr/local/data/cource.txt /user/hive/warehouse/test.db/subject
2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)
select s1.id,s1.name,s1.clazz,s2.score,s3.subject_id,s3.subject_name
from students s1
left join score s2 on s1.id=s2.id
left join subject s3 on s2.score_id=s3.subject_id
limit 10;
3、查询学生总分(输出:学号,姓名,班级,总分)
//先聚合再联查
select s1.id,s1.name,s1.clazz,s2.sum_score from students s1
left join (select id,sum(score) sum_score from score group by id) s2
on s1.id=s2.id limit 10;
//先联查再聚合
select s1.id,s1.name,s1.clazz,sum(s2.score) from students s1
left join score s2 on s1.id=s2.id
group by s1.id,s1.name,s1.clazz limit 10;
4.查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)
select s1.id,s1.name,s1.clazz,s2.sum_score from students s1 left join
(select id,sum(score) sum_score from score group by id) s2 on s1.id=s2.id order by s2.sum_score desc limit 3;
5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)
select s1.id,s1.name,s1.clazz,s2.sum_score from students s1 left join
(select id,sum(score) sum_score from score group by id) s2 on s1.id=s2.id
where s1.clazz='文科一班' order by s2.sum_score desc limit 10;
6、查询每个班级学生总分的平均成绩(输出:班级,平均分)
select ss1.clazz,avg(sc) avg_score from (select s1.clazz,s2.sc from students s1
left join (select id,sum(score) sc from score group by id) s2 on s1.id=s2.id) ss1 group by ss1.clazz;
7、查询每个班级的最高总分(输出:班级,总分)
select ss1.clazz,max(sc) avg_score from (select s1.clazz,s2.sc from students s1
left join (select id,sum(score) sc from score group by id) s2 on s1.id=s2.id) ss1 group by ss1.clazz;
8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)
select id,name,clazz,sum_score,rk
from(select id,name,clazz,sum_score,row_number() over(partition by clazz order by sum_score desc) rk
from(select t1.id,t1.name,t1.clazz,sum(t2.score) sum_score
from students t1
left join score t2 on t1.id = t2.id
group by t1.id,t1.name,t1.clazz) t1
) tt1 where tt1.rk <=3;