hive练习50题(1-10)

create database if not exists db_hive2;
use db_hive2;
-- 上传数据
-- [root@master datas]# hdfs dfs -mkdir -p /datas/student
-- [root@master datas]# hdfs dfs -mkdir -p /datas/course
-- [root@master datas]# hdfs dfs -mkdir -p /datas/teacher
-- [root@master datas]# hdfs dfs -mkdir -p /datas/score
-- hdfs dfs -put /opt/data/student.csv /tmp/hive_test/data/student
-- hdfs dfs -put /opt/data/course.csv /tmp/hive_test/data/course
-- hdfs dfs -put /opt/data/teacher.csv /tmp/hive_test/data/teacher
-- hdfs dfs -put /opt/data/score.csv /tmp/hive_test/data/score
-- 创建外部表导入数据
create external table if not exists ext_student(
    s_id int,
    s_name string,
    s_birth date,
    s_sex string
)row format delimited fields terminated by ','
stored as textfile
location '/datas/student';

create external table if not exists ext_course (
	c_id int,
	c_name string,
	t_id int
)
row format delimited fields terminated by ','
stored as textfile
location '/datas/course';

create external table if not exists ext_teacher (
	t_id int,
	t_name string
)
row format delimited fields terminated by ','
stored as textfile
location '/datas/teacher';

create external table if not exists ext_score (
	s_id int,
	c_id int,
	s_score int
)
row format delimited fields terminated by ','
stored as textfile
location '/datas/score';
show tables ;
-- 数据清洗
-- 创建内部表:将未填充性别的人员设置为女
select * from ext_student;
create table if not exists student as
    select s_id,s_name,s_birth,if(s_sex='','女',s_sex) s_sex
from ext_student;
select * from student;
-- 创建内部表 teacher,数据正常无需清洗,直接导入原始数据
create table if not exists teacher as
    select * from ext_teacher;
select * from teacher;
-- 创建内部表 course,将没有担任课程的教员编号填充到错误的教员编号
select * from ext_course;
set mapreduce.input.fileinputformat.split.maxsize=10240000;
create table if not exists course as
with
-- t1:正确的课程信息及教员编号:c_id,c_name,t_id
t1 as (select c_id,c_name,t_id from ext_course c where exists(select * from teacher t where t.t_id = c.t_id)),
-- t2:教员编号错误的课程信息:c_id,c_name
t2 as (select c.c_id,c.c_name from ext_course c where not exists(select * from t1 where t1.t_id = c.t_id)),
-- t3:未担任课程的教员编号:t_id
t3 as (select t.t_id from teacher t where not exists(select * from ext_course c where c.t_id = t.t_id)),
-- t4:t2和t3拼接(交叉连接),将未担任课程的教员编号拼到教员编号错误的课程信息后,再与t1合并,即为正确的数据
t4 as (select c_id,c_name,t_id from t1 union all select c_id,c_name,t_id from t2,t3)
-- union后顺序会乱,需要按照课程id排下序
select * from t4 order by c_id;
select * from course;
-- 创建内部表 score,暂不分区
create external table if not exists score (
	s_id int,
	c_id int,
	s_score int
)
row format delimited fields terminated by '\t'
stored as textfile;
load data local inpath '/opt/module/hive/datas/score.csv' into table score;
select * from score;
-----------------------------------------------
-- 1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数
-----------------------------------------------
with
sc1 as (select * from score where c_id=1),
sc2 as (select * from score where c_id=2)
select s.*,sc1.s_score score1,sc2.s_score score2
from student s
inner join sc1 on s.s_id=sc1.s_id
inner join sc2 on s.s_id=sc2.s_id
where sc1.s_score>sc2.s_score;



select
s_id,
   sum(if(c_id ='1',source.score,0)),
   sum(if(c_id ='2',source.score,0))
from source
group by  s_id;
--通过这个代码将成绩列转行,后面再进行比较

-----------------------------------------------
-- 2、查询"01"课程比"02"课程成绩低的学生的信息及课程分数
-----------------------------------------------
with
sc1 as (select * from score where c_id=1),
sc2 as (select * from score where c_id=2)
select s.*,sc1.s_score score1,sc2.s_score score2
from student s
inner join sc1 on sc1.s_id=s.s_id
inner join sc2 on sc2.s_id=s.s_id
where sc1.s_score<sc2.s_score;
-----------------------------------------------
-- 3、查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩
-----------------------------------------------
select s_id,avg(s_score) avg
from score group by s_id having avg>=60;
with
t1 as (select s_id,avg(s_score) avg from score group by s_id having avg>=60)
select s.s_id,s.s_name,t1.avg
from student s inner join t1 on s.s_id=t1.s_id;
-----------------------------------------------
-- 4、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩
-- (包括有成绩的和无成绩的)
-----------------------------------------------
with
t1 as ( select s_id,avg(s_score) avg from score group by s_id )
select s.s_id,s.s_name,if(t1.avg is null ,0,round(avg,2)) avg_score
from student s left join t1 on s.s_id=t1.s_id
where avg is null or avg<60;
-----------------------------------------------
-- 5、查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩
-----------------------------------------------
with
t1 as (select s_id,count(c_id) count,sum(s_score) sum
from score group by s_id)
select s.s_id,s.s_name,if(count is null ,0,count),if(sum is null ,0,sum)
from student s left join t1 on s.s_id=t1.s_id;
-----------------------------------------------
-- 6、查询"李"姓老师的数量
-----------------------------------------------
select * from teacher;
select count(*) from teacher where t_name like '李%';
-----------------------------------------------
-- 7、查询学过"张三"老师授课的同学的信息
-----------------------------------------------
select * from teacher;
select * from course;
select * from score;
with
t1 as ( select c_id  from course
where t_id=(select t_id from teacher where t_name='张三') ),
t2 as (select s_id from score,t1 where score.c_id=t1.c_id)
select student.* from student,t2
where student.s_id=t2.s_id;
-----------------------------------------------
-- 8、查询没学过"张三"老师授课的同学的信息
-----------------------------------------------
with
t1 as (select c_id from course where t_id=(select t_id from teacher where t_name='张三')),
t2 as (select s_id from score,t1 where score.c_id=t1.c_id)
select * from student
where not exists (select * from t2 where student.s_id=t2.s_id);

-----------------------------------------------
-- 9、查询学过编号为"01"并且!!也!!学过编号为"02"的课程的同学的信息
-- 注意这个也
-----------------------------------------------
select * from score;
with
t1 as (select s_id from score where c_id in (1,2) group by s_id having count(*)=2)
select  student.* from student,t1
where student.s_id=t1.s_id;
-----------------------------------------------
-- 10、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息
-----------------------------------------------
select * from score;
with
t1 as (select s_id from score where c_id=1),
t2 as (select s_id from score where c_id=2),
t3 as (select t1.s_id as s_id from t1 left join t2 on t1.s_id=t2.s_id where t2.s_id is null )
select * from student s
where exists (select * from t3 where s.s_id=t3.s_id);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值