hive-sql 最认真的整理一回

数据是网上的 自己找

create table sds(
sid string,
sname string,
sage INT,
ssex string
)
row format delimited fields terminated by ‘,’
stored as textfile

create table cs(
cid string,
cname string,
tid string
)
row format delimited fields terminated by ‘,’
stored as textfile

create table tc(
tid string,
tname string
)
row format delimited fields terminated by ‘,’
stored as textfile;

create table sc(
sid string,
cid string,
score int
)
row format delimited fields terminated by ‘,’
stored as textfile;

sql-1
sid s1 s2
02 70 60
04 50 30

思路:就是同一张表的对同一列数据不同ID所对应的同一列数据的大小可以用join

1,查询“某1”课程比“某2”课程成绩高的所有学生的学号
Total MapReduce CPU Time Spent: 8 seconds 170 msec
select
sid,
s1,
s2
from
(
select
sid,
sum(c1) s1,
sum(c2) s2
from
(
select
sid,
cid,
case when cid=‘01’ then score else 0 end c1,
case when cid=‘02’ then score else 0 end c2
from sc
)ssc
where c1 != 0 or c2 != 0
group by sid
) sss
where s1!= 0 and s2!=0 and s1>s2;

答案二
Total MapReduce CPU Time Spent: 4 seconds 50 msec
select
a.*,
b.score as b_score,
c.score as c_score
from sds a
join sc b on a.sid=b.sid
join sc c on a.sid=c.sid
where b.cid=‘01’ and c.cid=‘02’ and b.score>c.score;

2,查询平均成绩大于60分的同学的学号和平均成绩;
select
sid,
round(avg(score)) avg_score
from sc
group by sid
having avg(score)>60 ;

3,查询所有同学的学号、姓名、选课数、总成绩
select
b.sid,
a.sname,
sum(case when b.cid is not null then 1 else 0 end) total_courses,
sum(b.score) total_score
from sc b left join sds a
on b.sid=a.sid
group by b.sid,a.sname;

4.查询没学过“张三”老师课的同学的学号、姓名
select
c.*
from
sds c
left join
(
select
b.tid,
b.cname,
a.sid
from sc a left join cs b
on a.cid = b.cid
where b.tid in(
select tid from tc where tname=‘张三’
)
group by b.tid,b.cname,a.sid
) d
on c.sid =d.sid
where d.sid is null;

5、查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:(包括有成绩的和无成绩的)
select
a.sid,
a.sname,
round(avg(b.score)) as avg_score
from sds a
inner join sc b on a.sid=b.sid
group by a.sid,a.sname
having avg(score) <60

union all

select
c.sid,
c.sname,
0 avg_score
from sds c
where c.sid not in
(
select
sc.sid
from sc
group by sc.sid
);

6、查询学过“张三”老师课的同学的学号、姓名
select
sds.*
from sds
join sc on sds.sid=sc.sid
join cs on cs.cid=sc.cid
join tc on tc.tid = cs.tid and tname=‘张三’;

7、查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
select
a.*,
b.cid,
b.score,
c.cid,
c.score
from sds a
join sc b on a.sid=b.sid and b.cid=‘01’
join sc c on a.sid=c.sid and c.cid=‘02’

8、查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息
select
a.*,
tmp.sid,
tmp1.sid
from sds a
join (
select
sid
from sc
where cid=‘01’
) tmp
on a.sid=tmp.sid
left join (
select
sid
from sc
where cid=‘02’
) tmp1
on a.sid=tmp1.sid
where tmp1.sid is null;

9、列转行
select sid,concat_ws(’,’,collect_set(cid)) courses from sc group by sid;
结果:
sid courses
01 01,02,03
02 01,02,03
03 01,02,03
04 01,02,03
05 01,02
06 01,03
07 02,03

10、查询没有学全所有课程的同学的信息:
select
a.*
from sds a
join (
select
sc.sid
from sc
group by sid
having count(sc.cid) < 3
) tmp on a.sid =tmp.sid
union all
select
b.*
from sds b
left join (
select
sid
from sc
group by sid
) tmp1 on b.sid = tmp1.sid
where tmp1.sid is null;

答案二
select *
from sds
join (
select
count(1) num
from cs
) tmp
left join (
select
sid,
count(cid) num
from sc
group by sid
) tmp1 on sds.sid=tmp1.sid and tmp.num=tmp1.num
where tmp1.sid is null;

11 张老师教过的学生的信息分数信息
join的理解
select
*
from sc a
join (
select
b.cid
from cs b
join tc c on b.tid=c.tid and tnam
e=‘张三’
) tmp1 on a.cid=tmp1.cid;

结果是的
a.sid a.cid a.score tmp1.cid
01 02 90 02
02 02 60 02
03 02 80 02
04 02 30 02
05 02 87 02
07 02 89 02

11 查询除了张老师以外其他老师的教的学生分数信息

select
*
from sc a
join (
select
b.cid
from cs b
join tc c on b.tid=c.tid and tnam
e!=‘张三’
) tmp1 on a.cid=tmp1.cid;

结果是

a.sid a.cid a.score tmp1.cid
01 01 80 01
02 01 70 01
03 01 80 01
04 01 50 01
05 01 76 01
06 01 31 01
01 03 99 03
02 03 80 03
03 03 80 03
04 03 20 03
06 03 34 03
07 03 98 03

12 去重的方法
查询除了张老师以外其他老师的教的课程学生信息

方案一
select
distinct sid
from sc a
join (
select
b.cid
from cs b
join tc c on b.tid=c.tid and tnam
e!=‘张三’
) tmp1 on a.cid=tmp1.cid;
方案二
select
a.sid
from sc a
join (
select
b.cid
from cs b
join tc c on b.tid=c.tid and tnam
e!=‘张三’
) tmp1 on a.cid=tmp1.cid
group by sid;
而结果都是,只是性能的问题
sid
01
02
03
04
05
06
07

13 查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
第一步:
select a.* from sc a where a.score < 60 group by a.sid,a.cid,a.score;
结果是:
04 01 50
04 02 30
04 03 20
06 01 31
06 03 34

第二步:
select a.sid from sc a where a.score < 60 group by a.sid having count(sid)>1;

结果是:
04
06

第三步:
select b.sid,b.sname from sds b join (select a.sid from sc a where a.score < 60 g
roup by a.sid having count(sid)>1) tmp on b.sid=tmp.sid;

结果是:
b.sid b.sname
04 李云
06 吴兰

select b.sid,b.sname,tmp1.avg_score from sds b join (select a.sid from sc a where
a.score < 60 group by a.sid having count(sid)>1) tmp on b.sid=tmp.sid left join (select sid,round(avg(score)) avg_score from sc group by sid) tmp1 on tmp1.sid=b.sid;
结果是:
b.sid b.sname tmp1.avg_score
04 李云 33.0
06 吴兰 33.0

14 检索"01"课程分数小于60,按分数降序排列的学生信息:
select
b.,
tmp.score
from sds b
right join (
select
a.

from sc a
where a.cid=‘01’ and a.score <60
) tmp on b.sid=tmp.sid
order by tmp.score desc;
结果是:
b.sid b.sname b.sage b.ssex tmp.score
04 李云 19900806 男 50
06 吴兰 19920301 女 31

order by 心得:
在写sql的时候 查询的字段必须含有order by 之后的字段,不然不会报错误。
FAILED: SemanticException [Error 10004]: Line 1:119 Invalid table alias or column reference
‘tmp’: (possible column names are: sid, sname, sage, ssex)

15、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩
select
a.sid,
if(tmp.score is null,0,tmp.score) chinese,
if(tmp1.score is null,0,tmp1.score) math,
if(tmp2.score is null,0,tmp2.score) english,
round(avg(a.score)) avg_score
from sc a
left join (
select
sid,
score
from sc
where cid=‘01’
) tmp
on tmp.sid=a.sid
left join (
select
sid,
score
from sc
where cid=‘02’
) tmp1
on tmp1.sid=a.sid
left join (
select
sid,
score
from sc
where cid=‘03’
) tmp2
on tmp2.sid=a.sid
group by a.sid,tmp.score,tmp1.score,tmp2.score
order by avg_score desc;

结果为:
a.sid chinese math english avg_score
07 0 89 98 94.0
01 80 90 99 90.0
05 76 87 0 82.0
03 80 80 80 80.0
02 70 60 80 70.0
06 31 0 34 33.0
04 50 30 20 33.0

16、查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率:
–及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90

select
tmp.cid,
b.cname,
tmp.max_score,
tmp.min_score,
tmp.avg_score,
tmp.passRate,
tmp.modeRate,
tmp.goodRate,
tmp.excellentRate
from cs b
join (
select
a.cid,
max(a.score) max_score,
min(a.score) min_score,
round(avg(a.score),2) avg_score,
round(sum(case when a.score>=60 then 1 else 0 end)/count(a.cid),2) passRate,
round(sum(case when a.score>=70 and a.score<80 then 1 else 0 end)/count(a.cid),2) modeRate,
round(sum(case when a.score>=80 and a.score<90 then 1 else 0 end)/count(a.cid),2) goodRate,
round(sum(case when a.score>=90 then 1 else 0 end)/count(a.cid),2) excellentRate
from sc a
group by a.cid) tmp
on b.cid=tmp.cid

结果为:

01 语文 80 31 64.5 0.67 0.33 0.33 0.0
02 数学 90 30 72.67 0.83 0.0 0.5 0.17
03 英语 99 20 68.5 0.67 0.0 0.33 0.33

17、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:

select
b.*
from sds b
join (
select
sid
from sc
where cid in (
select
a.cid
from sc a
where a.sid=‘01’
) and sid!=01
group by sid
) tmp on b.sid =tmp.sid;

答案二
select
sds.sid,
sds.sname,
sds.sage,
sds.ssex
from sds
join (
select
cid
from sc
where sid=‘01’
) tmp
join (
select
sid,
cid
from sc
) tmp1 on sds.sid=tmp1.sid and tmp.cid=tmp1.cid
where sds.sid not in (‘01’)
group by sds.sid,sds.sname,sds.sage,sds.ssex;

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值