Hive常见的练习题
1. 测试数据
s_id s_name s_birth s_sex
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女
c_id c_course t_id
01 语文 02
02 数学 01
03 英语 03
t_id t_name
01 张三
02 李四
03 王五
s_id c_id s_score
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
2. 建表语句
create table if not exists student (
s_id int,
s_name string,
s_birth string,
s_sex string
)
row format delimited
fields terminated by ' ';
load data local inpath '/home/hadoop/hiveTable/student' into table student;
create table if not exists course (
c_id int,
c_course string,
t_id int
)
row format delimited
fields terminated by '\t';
load data inpath '/inputData/hiveTable/course' into table course;
create table if not exists teacher (
t_id int,
t_name string
)
row format delimited
fields terminated by '\t';
load data inpath '/inputData/hiveTable/teacher' into table teacher;
create table if not exists score (
s_id int,
c_id int,
s_score int
)
row format delimited
fields terminated by '\t';
load data local inpath '/home/hadoop/hiveTable/score' into table score;
3. 练习题
select s.s_id, s.s_name, s.s_birth, s.s_sex, s1.s_score, s2.s_score
from (select s_id, s_score from score where c_id = 01) s1
join (select s_id, s_score from score where c_id = 02) s2
on s1.s_id = s2.s_id
left join student s on s.s_id = s1.s_id
where s1.s_score > s2.s_score;
select s.s_id, s.s_name, s.s_birth, s.s_sex, s1.s_score, s2.s_score
from (select s_id, s_score from score where c_id = 01) s1
join (select s_id, s_score from score where c_id = 02) s2 on s1.s_id = s2.s_id
left join student s
on s.s_id = s1.s_id
where s1.s_score < s2.s_score;
select sc.s_id, s.s_name, avg(sc.s_score)
from score sc
left join student s on s.s_id = sc.s_id
group by sc.s_id, s.s_name
having avg(sc.s_score) >= 60;
select t.s_id, t.s_name, t.avgscore
from (select s.s_id, s.s_name, case when avg(sc.s_score) is null then 0 else avg(sc.s_score) end avgscore
from student s
left join score sc on s.s_id = sc.s_id
group by s.s_id, s.s_name) t
where t.avgscore < 60;
select s.s_id, s.s_name, count(sc.s_score) countscore, sum(sc.s_score) total
from student s
left join score sc on s.s_id = sc.s_id
group by s.s_id, s.s_name;
select count(1) countteacher
from teacher t
where t.t_name like '李%';
select s.s_id, s.s_name, s.s_birth, s.s_sex
from student s
join score sc on s.s_id = sc.s_id
join course c on sc.c_id = c.c_id
join teacher t on t.t_id = c.t_id
where t.t_name = '张三';
select s1.s_id, s1.s_name, s1.s_birth, s1.s_sex
from student s1
where s_id not in (select s.s_id
from student s
join score sc on s.s_id = sc.s_id
join course c on sc.c_id = c.c_id
join teacher t on t.t_id = c.t_id
where t.t_name = '张三');
select s1.s_id, s1.s_name, s1.s_birth, s1.s_sex
from student s1
where not exists (select s.s_id
from student s
join score sc on s.s_id = sc.s_id
join course c on sc.c_id = c.c_id
join teacher t on t.t_id = c.t_id
where t.t_name = '张三' and s.s_id = s1.s_id);
select s.s_id, s.s_name, s.s_birth, s.s_sex
from (select s_id from score where c_id = '01') sc1
join (select s_id from score where c_id = '02') sc2 on sc1.s_id = sc2.s_id
left join student s on s.s_id = sc1.s_id;
select s.s_id, s.s_name, s.s_birth, s.s_sex
from (select s_id from score where c_id = '01') sc1
left join (select s_id from score where c_id = '02') sc2 on sc1.s_id = sc2.s_id
left join student s on s.s_id = sc1.s_id
where sc2.s_id is null;
select s.s_id, max(s.s_name), max(s.s_birth), max(s.s_sex)
from student s
join course c
left join score sc on sc.s_id = s.s_id and c.c_id = sc.c_id
where sc.s_score is null
group by s.s_id;
select s.s_id, max(s.s_name), max(s.s_birth), max(s.s_sex)
from student s
join score sc on sc.s_id = s.s_id
where s.s_id <> '01' and exists (
select c_id from score where s_id = '01')
group by s.s_id;
select distinct s.s_id, s.s_name, s.s_birth, s.s_sex
from student s
join score sc on sc.s_id = s.s_id
where s.s_id <> '01' and sc.c_id in (
select c_id from score where s_id = '01');
select s.s_id, s.s_name, s.s_birth, s.s_sex
from student s
join (select s_id, concat_ws('-', collect_set(cast(c_id as string))) course_id
from score
where s_id <> '01'
group by s_id) t1 on t1.s_id = s.s_id
join (select s_id, concat_ws('-', collect_set(cast(c_id as string))) course_id
from score
where s_id = '01'
group by s_id) t2 on t2.course_id = t1.course_id;
select s.s_name
from student s
where not exists (
select sc.s_id
from teacher t, course c, score sc
where t.t_id = c.t_id and t.t_name = '张三' and sc.c_id = c.c_id and sc.s_id = s.s_id);
select s.s_id, s.s_name, t.avgScore
from student s
join (select s_id, count(s_id) countNum, avg(s_score) avgScore
from score
where s_score < 60
group by s_id) t on s.s_id = t.s_id;
select t.s_id, t.s_name, t.s_birth, t.s_sex
from (select s.s_id, s.s_name, s.s_birth, s.s_sex, sc.s_score
from student s
join score sc on sc.s_id = s.s_id
where sc.c_id = '01' and sc.s_score < 60
order by sc.s_score desc) t;
select s.s_id, max(s.s_name) s_name,
sum(case sc.c_id when '01' then sc.s_score else 0 end) `语文`,
max(case sc.c_id when '02' then sc.s_score else 0 end) `数学`,
sum(if(sc.c_id='03', sc.s_score, 0)) `英语`,
if(avg(sc.s_score) is null, 0, avg(sc.s_score)) avgScore
from student s
left join score sc on sc.s_id = s.s_id
group by s.s_id
order by avgScore desc;
select sc.c_id, c.c_course, max(sc.s_score) maxScore, min(sc.s_score) minScore, round(avg(sc.s_score), 2) avgScore, round(avg(sc.`及格`), 2) `及格率`, round(avg(sc.`中等`), 2) `中等率`, round(avg(sc.`优良`), 2) `优良率`, round(avg(sc.`优秀`), 2) `优秀率`
from (select s_id, c_id, s_score,
case when s_score >= 60 then 1 else 0 end as `及格`,
case when s_score >= 70 and s_score < 80 then 1 else 0 end as `中等`,
case when s_score >= 80 and s_score < 90 then 1 else 0 end as `优良`,
case when s_score >= 90 then 1 else 0 end as `优秀`
from score) sc
left join course c on c.c_id = sc.c_id
group by sc.c_id, c.c_course;
select s_id, c_id, s_score,
row_number() over(distribute by c_id sort by s_score desc) rowNum
from score;
select s.s_id, s.s_name, sum(s_score) sumScore
from score sc
join student s on s.s_id = sc.s_id
group by s.s_id,s.s_name
order by sumScore desc;
select c.t_id, sc.c_id, round(avg(sc.s_score), 2) avgScore
from score sc
join course c on sc.c_id = c.c_id
group by c.t_id, sc.c_id
order by c.t_id, avgScore desc;
select s.s_id, s.s_name, s.s_birth, s.s_sex, sc.s_score
from (select s_id, c_id, s_score,
row_number() over(distribute by c_id sort by s_score desc) rowNum
from score) sc
left join student s on s.s_id = sc.s_id
where sc.rowNum between 2 and 3;
select sc.c_id, c.c_course,
round(sum(case when sc.s_score >= 85 then 1 else 0 end)/count(1), 2) 85Score,
round(sum(case when sc.s_score >= 70 and sc.s_score < 85 then 1 else 0 end)/count(1), 2) 70Score,
round(sum(case when sc.s_score >= 60 and sc.s_score < 70 then 1 else 0 end)/count(1), 2) 60Score,
round(sum(case when sc.s_score < 60 then 1 else 0 end)/count(1), 2) 0Score,
count(1) as totalStu
from score sc
left join course c on c.c_id = sc.c_id
group by sc.c_id, c.c_course;
select sc.s_id, sc.avgScore, row_number() over(sort by sc.avgScore desc) rowNum
from (select s_id, round(avg(s_score), 2) avgScore
from score
group by s_id) sc;
select sc.s_id, sc.c_id, sc.s_score, sc.rowNum, sc.rankNum, sc.denseRank
from (select s_id, c_id, s_score,
row_number() over(distribute by c_id sort by s_score desc) as rowNum,
rank() over(distribute by c_id sort by s_score desc) as rankNum,
dense_rank() over(distribute by c_id sort by s_score desc) as denseRank
from score) sc
where sc.rowNum < 4;
select c_id, count(1) totalStu
from score
group by c_id;
select s.s_id, s.s_name
from score sc
left join student s on s.s_id = sc.s_id
group by s.s_id, s.s_name
having count(1) = 2;
select s_sex, count(1) totalSex
from student
group by s_sex;
select s_id, s_name, s_birth, s_sex
from student
where s_name like '%风%';
select s_name, s_sex, count(1) totalStu
from student
group by s_name, s_sex
having totalStu > 1;
select s_id, s_name, s_birth, s_sex
from student
where year(s_birth) = '1990';
select sc.c_id, sc.avgScore
from (select c_id, round(avg(s_score), 2) avgScore
from score
group by c_id) sc
order by sc.avgScore desc, sc.c_id asc;
select s.s_id, s.s_name, avg(sc.s_score) avgScore
from student s
left join score sc on sc.s_id = s.s_id
group by s.s_id, s.s_name
having avg(sc.s_score) >= 85;
select s.s_name, sc.s_score
from score sc
left join course c on sc.c_id = c.c_id
left join student s on s.s_id = sc.s_id
where c.c_course = '数学' and sc.s_score < 60;
select s.s_id, s.s_name,
sum(case sc.c_id when '01' then sc.s_score else 0 end) `语文`,
sum(case sc.c_id when '02' then sc.s_score else 0 end) `数学`,
sum(if(sc.c_id='03', sc.s_score, 0)) `英语`
from student s
left join score sc on sc.s_id = s.s_id
group by s.s_id, s.s_name;
select s.s_id, s.s_name, c.c_course, sc.s_score
from student s
left join score sc on sc.s_id = s.s_id
left join course c on sc.c_id = c.c_id
where sc.s_score > 70;
select s.s_id, s.s_name, c.c_course, sc.s_score
from score sc
left join student s on s.s_id = sc.s_id
left join course c on c.c_id = sc.c_id
where sc.s_score < 60;
select s.s_id, s.s_name
from score sc
left join student s on s.s_id = sc.s_id
where sc.c_id = '01' and sc.s_score > 80;
select sc.c_id, c.c_course, count(1) totalStu
from score sc
left join course c on c.c_id = sc.c_id
group by sc.c_id, c.c_course;
select a.s_id, a.s_name, a.s_score
from (select s.s_id, s.s_name, sc.s_score,
dense_rank() over(distribute by sc.c_id sort by sc.s_score desc) denseRank
from score sc
left join student s on s.s_id = sc.s_id
left join course c on c.c_id = sc.c_id
left join teacher t on t.t_id = c.t_id
where t.t_name = '张三') a
where a.denseRank = 1;
select sc1.s_id, sc1.c_id, sc1.s_score
from score sc1
join score sc2 on sc1.c_id != sc2.c_id
where sc1.s_score = sc2.s_score and sc1.s_id = sc2.s_id
group by sc1.s_id, sc1.c_id, sc1.s_score;
select distinct sc1.s_id, sc1.c_id, sc1.s_score
from score sc1
join score sc2 on sc1.c_id != sc2.c_id
where sc1.s_score = sc2.s_score and sc1.s_id = sc2.s_id;
select sc.s_id, sc.c_id, sc.s_score, sc.denseNum
from(select s_id, c_id, s_score, dense_rank() over(distribute by c_id sort by s_score desc) as denseNum
from score) sc
where sc.denseNum < 4;
select sc.c_id, count(sc.c_id) countNum
from score sc
group by sc.c_id
having countNum > 5
order by countNum desc, sc.c_id asc;
select sc.s_id, count(sc.c_id) countNum
from score sc
group by sc.s_id
having countNum >= 2;
select s.s_id, s.s_name
from student s
join course c
left join score sc on sc.c_id = c.c_id and sc.s_id = s.s_id
group by s.s_id, s.s_name
having sum(case when sc.s_score is null then 1 else 0 end) = 0;
select s_birth, (year(current_date()) - year(s_birth) -
(case when month(current_date()) > month(s_birth) then 0 when month(current_date()) = month(s_birth) and day(current_date()) >= day(s_birth) then 0 else 1 end)) age
from student;
select s_id, s_name, s_birth, s_sex
from student
where weekofyear(s_birth) = weekofyear(current_date());
select s_id, s_name, s_birth, s_sex
from student
where weekofyear(s_birth) = weekofyear(current_date()) + 1;
select s_id, s_name, s_birth, s_sex
from student
where month(current_date()) = month(s_birth);
select s_id, s_name, s_birth, s_sex
from student
where month(s_birth) = 12;