hive求出场率和环比

26 篇文章 0 订阅 ¥29.90 ¥99.00

1、有如下数据:(建表语句+sql查询)

id names

1 aa,bb,cc,dd,ee

2 aa,bb,ff,ww,qq

3 aa,cc,rr,yy

4 aa,bb,dd,oo,pp

求英雄的出场排名top3的出场次数及出场率

create table if not exists t_names(

id int,

names array

)

row format delimited

fields terminated by ‘\t’

collection items terminated by ‘,’

;

select *

from (

select name,cc,cc / (sum(cc) over()) as ccl,

rank() over(sort by cc desc) as rk

from (

select

name,

count(1) as cc

from t_names lateral view explode(names) tt as name

group by

name

) a

) aa

where aa.rk <= 3

;

2、有如下通话记录:

Zhangsan Wangwu 01:01:01

Zhangsan Zhaoliu 00:11:21

Zhangsan Yuqi 00:19:01

Zhangsan Jingba 00:21:01

Zhangsan Wuxi 01:31:17

Wangwu Zhaoliu 00:51:01

Wangwu Zhaoliu 01:11:19

Wangwu Yuqi 00:00:21

Wangwu Yuqi 00:23:01

Yuqi Zhaoliu 01:18:01

Yuqi Wuxi 00:18:00

Jingba Wangwu 00:01:01

Jingba Wangwu 00:00:06

Jingba Wangwu 00:02:04

Jingba Wangwu 00:02:54

Wangwu Yuqi 01:00:13

Wangwu Yuqi 00:01:01

Wangwu Zhangsan 00:01:01

统计两个人的通话总时长(用户之间互相通话的时长)

create table relations(

fromstr string,

tostr string,

time string

)

row format delimited

fields terminated by ’ ’

;

select

fromstr,

tostr,

sum(duration) as durations

from (

Select

Case when fromstr >= tostr then fromstr else tostr end fromstr,

Case when fromstr >= tostr then tostr else fromstr end tostr,

Split(time,’:’)[0] * 60 * 60 + Split(time,’:’)[1] * 60 + Split(time,’:’)[2] duration

from relations

) a

group by fromstr,tostr

;

5、有如下销售数据:(建表语句+sql查询)(20分)

店铺 月份 金额

a,01,150

a,01,200

b,01,1000

b,01,800

c,01,250

c,01,220

b,01,6000

a,02,2000

a,02,3000

b,02,1000

b,02,1500

c,02,350

c,02,280

a,03,350

a,03,250

3、编写Hive的HQL语句求出每个店铺的当月销售额和累计到当月的总销售额

create table t_store(

name string,

months int,

money int

)

row format delimited fields terminated by “,”;

select name,months,amoney,sum(amoney) over(distribute by name sort by months asc rows between unbounded preceding and current row) as totalmomey

from (

Select name,months,sum(money) as amoney

From t_store

Group by name,months

) a

4、[Hive SQL]统计amt连续3个月,环比增长>50%的user

user_id month amt
1,20170101,100
3,20170101,20
4,20170101,30
1,20170102,200
2,20170102,240
3,20170102,30
4,20170102,2
1,20170101,180
2,20170101,250
3,20170101,30
4,20170101,260

select user_id
from(
select
user_id,month,mon_amt,pre_mon_amt,
sum(case when ((mon_amt - pre_mon_amt) / pre_mon_amt * 100) > 50
and datediff(to_date(month,‘yyyymm’),to_date(pre2_month,‘yyyymm’),‘mm’) = 2
then 1
else 0 end) over(partition by user_id order by month asc rows between current row and 2 following) as flag
from (
select
user_id,
substr(month,0,6) as month,
sum(amt) as mon_amt,
lag(sum(amt),1,0.00001) over(partition by user_id order by substr(month,0,6) asc ) as pre_mon_amt,
substr(lag(substr(month,0,6),2,‘199001’) over(partition by user_id order by substr(month,0,6) asc),0,6) as pre_2_mon
from amt
group by user_id,substr(month,0,6)
) t1
) t2
where t2.flag >=3;

5、hive经典50题

数据

Student(Sid,Sname,Sage,Ssex)学生表
Sid:学号
Sname:学生姓名
Sbirth:学生生日
Ssex:学生性别

Course(Cid,Cname,T#)课程表
Cid:课程编号
Cname:课程名称
Tid:教师编号

SC(Sid,Cid,score)成绩表
Sid:学号
Cid:课程编号
score:成绩

Teacher(Tid,Tname)教师表
Tid:教师编号:
Tname:教师名字

01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女

01 语文 02
02 数学 01
03 英语 03

01 张三
02 李四
03 王五

01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98

题目+答案

–1、查询01课程比02课程成绩高的所有学生的学号
select sc1.sid,sc1.score score1,sc2.score score2
from sc sc1
join sc sc2 on sc1.sid=sc2.sid
where sc1.cid = 1 and sc2.cid = 2 and sc1.score>sc2.score

–2、查询平均成绩大于60分的同学的学号和平均成绩
select sid,avg(score) avgscore
from sc
group by sid
having avgscore>60;

–3、查询所有同学的学号、姓名、选课数、总成绩
select stu.sid,stu.sname,
count(sc.cid) countcourse,
case when sum(sc.score) is null then 0 else sum(sc.score) end sumscore
from student stu
left join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname;

–4、查询姓‘李’的老师的个数:
select count(*) from teacher where tname like ‘李%’;

–5、查询没有学过“张三”老师课程的同学的学号、姓名:
select stu.sid,sname
from student stu
join course cs
join teacher t
left join sc on stu.sid=sc.sid and cs.cid=sc.cid and t.tid=cs.tid
where tname=‘张三’
group by stu.sid,sname
having sum(case when sc.sid is null then 0 else 1 end)=0;

–6、查询学过“张三”老师所教的所有课的同学的学号、姓名:
select stu.sid,sname
from student stu
join course cs
join teacher t
left join sc on stu.sid=sc.sid and cs.cid=sc.cid and t.tid=cs.tid
where tname=‘张三’
group by stu.sid,sname
having sum(case when sc.sid is null then 1 else 0 end)=0;

–7、查询学过01并且也学过编号02课程的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
join sc sc1 on sc1.sid=stu.sid
join sc sc2 on sc2.sid=sc1.sid
where sc1.cid=01 and sc2.cid=02;

–8、查询课程编号02的成绩比课程编号01课程成绩低的所有同学的学号、姓名:
select stu.sid,stu.sname
from student stu
join sc sc1 on sc1.sid=stu.sid and sc1.cid=01
left join sc sc2 on sc2.sid=sc1.sid and sc2.cid=02
where sc1.score>sc2.score or sc2.score is null;

–9、查询所有课程成绩小于60的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
left join sc
on sc.sid=stu.sid and sc.score>=60
group by stu.sid,stu.sname
having sum(case when sc.sid is null then 0 else 1 end)=0;

–10、查询没有学全所有课的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
left join course cs
left join sc on sc.sid=stu.sid and cs.cid=sc.cid
group by stu.sid,stu.sname
having sum(case when sc.cid is null then 1 else 0 end)>0;

–11、查询至少有一门课与学号为01同学所学相同的同学的学号和姓名:
select distinct st.sid,st.sname
from student st
join sc sc1
on st.sid=sc1.sid
join sc sc2
on sc1.cid=sc2.cid
where sc2.sid=1;

–12、查询至少学过学号为01同学所有一门课的其他同学学号和姓名;
select distinct st.sid,st.sname
from student st
join sc sc1
on st.sid=sc1.sid
join sc sc2
on sc1.cid=sc2.cid
where sc2.sid=1 and sc1.sid!=1;

–13、查询张三老师教的课的平均成绩:
select avg(score) avgscore
from sc
join course co
on sc.cid=co.cid
join teacher t
on t.tid=co.tid
where tname=‘张三’;

–14、查询和02号的同学学习的课程完全相同的其他同学学号和姓名:
select stu.sid,stu.sname
from student stu
join
(
select stu.sid as stid,sc1.sid as scid,case when stu.sid is null then sc1.sid else stu.sid end as all_id
from student stu
join (
select sc.cid
from sc
where sc.sid = 2
) aa
full outer join sc sc1 on sc1.cid = aa.cid and sc1.sid = stu.sid
) a on a.all_id = stu.sid
group by stu.sid,stu.sname
having sum(case when stid is null or scid is null then 1 else 0 end) = 0 and st.sid!=2
;
–15、查询学习“张三”老师课的成绩表记录:
select sc.*
from sc
join course c
on sc.cid=c.cid
join teacher t
on c.tid=t.tid
where t.tname=‘张三’;

–16、查询没有上过编号03课程的同学学号的02号课的成绩:
select sc.*
from sc
left join
(select * from sc where sc.cid = ‘3’) sc2
on sc.sid =sc2.sid
where sc2.cid is null and sc.cid=2
;

–17、按平均成绩从高到低显示所有学生的“语文”、“数学”、“英语”三门的课程成绩,
–按如下形式显示:学生ID,数据库,企业管理,英语,有效课程数,有效平均分
select sc.sid,
max(case course.cname when ‘语文’ then sc.score else 0 end) yuwen,
max(case course.cname when ‘数学’ then sc.score else 0 end) shuxue,
max(case course.cname when ‘英语’ then sc.score else 0 end) yingyu,
count(sc.cid) kechengshu,
avg(sc.score) pingjunfen
from sc join course
on sc.cid=course.cid
group by sc.sid
order by pingjunfen;

–18、查询各科成绩最高和最低的分:以如下的形式显示:课程ID,最高分,最低分
select cid,max(score) maxscore,min(case when score is null then 0 else score end) minscore
from sc
group by cid;
–19、按各科平均成绩从低到高和及格率的百分数从高到低顺序:
select avg(score) avgscore,concat(cast(sum(case when score >= 60 then 1 else 0 end)/count(sc.sid) as string),’%’) jigelv
from sc
group by cid
order by avgscore asc,jigelv desc;

–20、查询如下课程平均成绩和及格率的百分数(用”1行”显示): 语文(01),数学(02),英语(03)
select
max(case t1.cid when 1 then concat(t1.avgscore,’:’,jigelv) else 0 end) as yuwen,
max(case t1.cid when 2 then concat(t1.avgscore,’:’,jigelv) else 0 end) as shuxue,
max(case t1.cid when 3 then concat(t1.avgscore,’:’,jigelv) else 0 end) as yingyu
from
(select sc.cid,avg(score) avgscore,
concat(cast(sum(case when score >= 60 then 1 else 0 end)*100/count(sc.sid) as string),’%’) jigelv
from sc
join course cs
on sc.cid=cs.cid
group by sc.cid,cs.cname having cs.cname=‘语文’ or cs.cname=‘数学’ or cs.cname=‘英语’) t1;

–21、查询不同老师所教不同课程平均分从高到低显示:
select cs.tid,avg(score) avgscore
from sc
join course cs
on sc.cid=cs.cid
join teacher t
on t.tid=cs.tid
group by cs.tid,cs.cid
order by avgscore desc;

–22、查询如下课程成绩第3名到第6名的学生成绩单:语文(01),数学(02),英语(03)
select a.*
from
(
select sc.*,
rank() over(distribute by sc.cid sort by sc.score desc) rk
from sc) a
where a.rk between 3 and 6;

–23、统计下列各科成绩,各分数段人数:课程ID,课程名称,[100-85],[85-70],[70-60],[ 小于60] :
select a.cid,a.cname,a.px,
count(a.px)
from
(
select cs.cid,cs.cname,
(case when score<60 then ‘[小于60]’
when score<70 then ‘[70-60]’
when score<85 then ‘[85-70]’
else ‘[100-85]’ end) as px
from sc
join course cs
on sc.cid=cs.cid
) a
group by a.cid,a.cname,a.px
;

–24、查询学生平均成绩及其名次:
select a.*,
rank() over(distribute by 1 sort by a.avgscore) rk
from
(
select sc.sid,
avg(sc.score) avgscore
from sc
group by sc.sid
) a;

–25、查询各科成绩前三名的记录(不考虑成绩并列情况):
select a.,cs.cname
from
(
select sc.
,
row_number() over(distribute by sc.cid sort by sc.score desc) rk
from sc
) a
join course cs
on cs.cid=a.cid
where a.rk<4;

–26、查询每门课程被选修的学生数:
select cs.cid,cs.cname,sum(case when sc.sid is null then 0 else 1 end) cd
from sc
right join course cs
on sc.cid=cs.cid
group by cs.cid,cs.cname;

–27、查询出只选修一门课程的全部学生的学号和姓名:
select stu.sid,stu.sname
from student stu
join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname
having count(stu.sid)=1;

–28、查询男生、女生人数:
select sum(if(ssex=‘男’,1,0)) male,
sum(if(ssex=‘女’,1,0)) female
from student;

–29、查询姓“张”的学生名单:
select * from student
where sname like ‘张%’;

–30、查询同名同姓的学生名单,并统计同名人数:
select stu.*,
count(sid) over(distribute by stu.sname) stucount
from student stu;

–31、1981年出生的学生名单
select stu.*
from student stu
where substring(stu.sage,1,4)=‘1990’;

–32、查询平均成绩大于80的所有学生的学号、姓名和平均成绩:
select stu.sid,stu.sname,
avg(score) avgscore
from student stu
join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname
having avgscore>80;

–33、查询每门课程的平均成绩,结果按平均成绩升序排序,平均成绩相同时,按课程号降序排列:
select sc.cid,cs.cname,avg(score) avgscore
from sc
join course cs
on cs.cid=sc.cid
group by sc.cid,cs.cname
order by avgscore asc,sc.cid desc;

–34、查询课程名称为“数学”,且分数低于60的学生名字和分数:
select sname,score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on sc.cid=cs.cid
where cs.cname=‘数学’ and score<60;

–35、查询所有学生的选课情况:
select stu.sid,stu.sname,cs.cname
from sc
join course cs
on cs.cid=sc.cid
join student stu
on
stu.sid=sc.sid;

–36、查询任何一门课程成绩在70分以上的姓名、课程名称和分数:
select sname,cname,score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
where sc.score>70;

–37、查询不及格的课程,并按课程号从大到小的排列:
select sc.cid,cname
from sc
join course cs
on cs.cid=sc.cid
where sc.score<60
group by sc.cid,cname
order by sc.cid desc;

–38、查询课程编号为03且课程成绩在80分以上的学生的学号和姓名:
select stu.sid,stu.sname
from student stu
join sc
on sc.sid=stu.sid
where sc.cid=3 and score>=80;

–39、求选了课程的学生人数:
select count(aa.sid) from
(select sid
from sc
group by sid) aa;

–40、查询选修“张三”老师所授课程的学生中,成绩最高的学生姓名及其成绩
select
first_value(sname)
over(distribute by tname sort by score desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) name,
first_value(score)
over(distribute by tname sort by score desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on cs.cid=sc.cid
join teacher t
on t.tid=cs.tid
where tname=‘张三’
limit 1
;

select sname,score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on cs.cid=sc.cid
join teacher t
on t.tid=cs.tid
where tname=‘张三’
order by score desc
limit 1;

–41、查询各个课程及相应的选修人数:
select sc.cid,cs.cname,count(sc.sid) cnt
from sc
join course cs
on sc.cid=cs.cid
group by sc.cid,cs.cname;

–42、查询不同课程成绩相同的学生和学号、课程号、学生成绩:
select stu.sname,stu.sid,sc.cid,sc.score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
order by sc.score

select sc.sid,sc.cid,sc.score
from sc
join
(
select sc.score,count(sc.score) cntscore
from sc
group by sc.score
) a on a.score=sc.score
join course cs
on cs.cid=sc.cid
where a.cntscore>1;

–43、查询每门课程成绩最好的前两名:
select sid,cid,score
from
(
select sc.*,
rank() over(distribute by cid sort by score) rk
from sc
) aa
where aa.rk<3;

–44、统计每门课程的学生选修人数(超过5人的课程才统计)。
–要求输出课程号和选修人数,查询结果按人数降序排序,若人数相同,按课程号升序排序:
select cid,count(sid) cntsid
from sc
group by cid having cntsid>5
order by cntsid desc,cid asc;

–45、检索至少选修两门课程的学生学号:
select sid
from sc
group by sid
having count(cid)>=2
;

–46、查询全部学生选修的课程的课程号和课程名:
select cs.cid,cs.cname
from student stu
join course cs
left join sc
on sc.cid=cs.cid and stu.sid=sc.sid
group by cs.cid,cs.cname
Having sum(case when sc.score is null then 1 else 0 end)=0;

select cid,cname,sum1 from (
select sc.cid,cs.cname,sum(case when score is null then 0 else 1 end) sum1
from student stu
join course cs
left join sc
on sc.cid=cs.cid and stu.sid=sc.sid
group by sc.cid,cs.cname
) aa
join (select count(*) c from student) bb
where sum1=bb.c;

–47、查询没学过”张三”老师讲授的任一门课程的学生姓名:
select stu.sname
from student stu
join course cs
left join teacher t
on t.tid=cs.tid
left join sc
on sc.sid=stu.sid and cs.cid=sc.cid
where tname = ‘张三’
group by sname
having sum(case when score is null then 0 else 1 end)=0
;

–48、查询两门以上不及格课程的同学的学号以及其平均成绩:
select sid,count(cid) cd
from sc
where score<60
group by sid
having cd>=2;

–49、检索02课程分数小于60,按分数降序排列的同学学号
select sc.sid,score
from sc
where sc.score<60 and sc.cid=2
order by score desc;

–50、查询任意一门课程成绩在70分以上的姓名、课程名称和分数:
select sname,cname,score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
where score>70;

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

尬聊码农

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值