Hive简单分析练习

创建表(在Linux中打开hive)

三分数据:students.txt,score.txt,cource.txt
students.txt
cource.txt
score.txt

create table students(
    id bigint ,
    name string ,
    age int ,
    gender string ,
    clazz string 
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; 
create table score(
    id bigint ,
    score_id bigint  ,
    score int 
)  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';  

1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据

create table subject (
    subject_id bigint,
    subject_name string
)row format delimited fields terminated by ',';
hdfs dfs -put /usr/local/data/cource.txt /user/hive/warehouse/test.db/subject

2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)

select s1.id,s1.name,s1.clazz,s2.score,s3.subject_id,s3.subject_name 
from students s1 
left join score s2 on s1.id=s2.id 
left join subject s3 on s2.score_id=s3.subject_id 
limit 10;

3、查询学生总分(输出:学号,姓名,班级,总分)

//先聚合再联查
select s1.id,s1.name,s1.clazz,s2.sum_score from students s1 
left join (select id,sum(score) sum_score from score group by id) s2 
on s1.id=s2.id limit 10;

//先联查再聚合
select s1.id,s1.name,s1.clazz,sum(s2.score) from students s1
left join score s2 on s1.id=s2.id
group by s1.id,s1.name,s1.clazz limit 10;

4.查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)

select s1.id,s1.name,s1.clazz,s2.sum_score from students s1 left join 
(select id,sum(score) sum_score from score group by id) s2 on s1.id=s2.id order by s2.sum_score desc limit 3;

5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)

select s1.id,s1.name,s1.clazz,s2.sum_score from students s1 left join 
(select id,sum(score) sum_score from score group by id) s2 on s1.id=s2.id 
where s1.clazz='文科一班' order by s2.sum_score desc limit 10;

6、查询每个班级学生总分的平均成绩(输出:班级,平均分)

select ss1.clazz,avg(sc) avg_score from (select s1.clazz,s2.sc from students s1 
left join (select id,sum(score) sc from score group by id) s2 on s1.id=s2.id) ss1 group by ss1.clazz;

7、查询每个班级的最高总分(输出:班级,总分)

select ss1.clazz,max(sc) avg_score from (select s1.clazz,s2.sc from students s1 
left join (select id,sum(score) sc from score group by id) s2 on s1.id=s2.id) ss1 group by ss1.clazz;

8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)

select  id,name,clazz,sum_score,rk
from(select  id,name,clazz,sum_score,row_number() over(partition by clazz order by sum_score desc) rk
        from(select  t1.id,t1.name,t1.clazz,sum(t2.score) sum_score
             from students t1
             left join score t2 on t1.id = t2.id
             group by t1.id,t1.name,t1.clazz) t1
) tt1 where tt1.rk <=3;
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值