SQL、Hive场景题及答案
点赞收藏呦!
默认mysql老版本没有支持,在最新的8.0版本中支持, Oracle和Hive中都支持窗口函数,下列题目皆在Hive中实现
第一题:
了解哪些窗口函数,都是什么功能?找一个在某个业务中的应用? 手写窗口函数及功能意义,同时随便写一个带窗口函数的sql,并说明其sql的含义。
窗口函数:通常格式为 可用函数+over()函数
-- 查询所有明细
select * from t_order;
# 查询总量
select count(*) from t_order;
等价于:
select *, count(*) over() from t_order;
2、求出每个栏目的被观看次数及累计观看时长?
数据: video表
uid channel min
1 1 23
2 1 12
3 1 12
4 1 32
5 1 342
6 2 13
7 2 34
8 2 13
9 2 134
create table video(
uid int,
channel string,
min int
)
row format delimited
fields terminated by ' '
;
load data local inpath './data/video' into table video;
答案:
select channel,count(*) count,sum(min) total from video group by channel;
3、编写sql实现
数据:
userid,month,visits
A,2015-01,5
A,2015-01,15
B,2015-01,5
A,2015-01,8
B,2015-01,25
A,2015-01,5
A,2015-02,4
A,2015-02,6
B,2015-02,10
B,2015-02,5
A,2015-03,16
A,2015-03,22
B,2015-03,23
B,2015-03,10
B,2015-03,1
drop table visits;
create table visits(
userid string,
month string,
visits int
)
row format delimited
fields terminated by ','
;
load data local inpath './hivedata/visits' overwrite into table visits;
完成需求:每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数,结果数据格式如下:
+---------+----------+---------+-------------+---------------+--+
| userid | month | visits | max_visits | total_visits |
+---------+----------+---------+-------------+---------------+--+
| A | 2015-01 | 33 | 33 | 33 |
| A | 2015-02 | 10 | 33 | 43 |
| A | 2015-03 | 38 | 38 | 81 |
| B | 2015-01 | 30 | 30 | 30 |
| B | 2015-02 | 15 | 30 | 45 |
| B | 2015-03 | 34 | 34 | 79 |
+---------+----------+---------+-------------+---------------+--+
select userid,month,
visits,
max(visits) over(distribute by userid sort by month) max_visits,
sum(visits) over(distribute by userid sort by month) total_visits
from (
select userid,month,
sum(visits) visits
from visits
group by userid,month
) t;
4、编写连续7天登录的总人数:
数据: t1表
Uid dt login_status(1登录成功,0异常)
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
2 2019-07-19 1
2 2019-07-20 0
2 2019-07-21 1
2 2019-07-22 0
2 2019-07-23 1
2 2019-07-24 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1
drop table login;
create table login(
Uid int,
dt string,
login_status int
)
row format delimited
fields terminated by ' '
;
load data local inpath './hivedata/login' into table login;
select count(*)
from
(
select distinct uid
from(
select uid,dt,lag(dt,6) over(partition by uid order by dt) pre_dt,
sum(login_status) over(partition by uid order by dt rows between 6 preceding and current row) total
from login
) t
where date_sub(dt,6)=pre_dt and t.total=7) t1;
5、你知道的排名函数有哪些?说一说它们之间的区别? 文字说明即可
-
row_number从1开始,按照顺序,生成分组内记录的序列,row_number()的值不会存在重复,当排序的值相同时,按照表中记录的顺序进行排列
-
RANK() 生成数据项在分组中的排名,排名相等会在名次中留下空位
-
DENSE_RANK() 生成数据项在分组中的排名,排名相等会在名次中不会留下空位
6、编写sql语句实现每班前三名,分数一样不并列,同时求出前三名按名次排序的一次的分差:
数据: stu表
Stu_no class score
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
drop table stu;
create table stu(
Stu_no int,
class string,
score int
)
row format delimited
fields terminated by '\t'
;
load data local inpath './hivedata/stu' into table stu;
编写sql实现,结果如下:
+--------+---------+--------+-----+----------+--+
| class | stu_no | score | rn | rn_diff |
+--------+---------+--------+-----+----------+--+
| 1901 | 2 | 90 | 1 | 90 |
| 1901 | 1 | 90 | 2 | 0 |
| 1901 | 3 | 83 | 3 | -7 |
| 1902 | 7 | 99 | 1 | 99 |
| 1902 | 9 | 87 | 2 | -12 |
| 1902 | 8 | 67 | 3 | -20 |
+--------+---------+--------+-----+----------+--+
select class,stu_no,score,rn,rn_diff
from(
select class,stu_no,score,
row_number() over(partition by class order by score desc) rn,
score-nvl(lag(score,1) over(partition by class order by score desc),0) rn_diff
from stu
) t
where t.rn<4;
7、对于行列互换,你有哪些解决方式,详细说明每一种方式? 使用语言描述即可
collect_set,collect_list,explode.....
8、编写sql实现行列互换。数据如下:
id sid subject int
1,001,语文,90
2,001,数学,92
3,001,英语,80
4,002,语文,88
5,002,数学,90
6,002,英语,75.5
7,003,语文,70
8,003,数学,85
9,003,英语,90
10,003,政治,82
编写sql实现,得到结果如下:
+---------+--------+--------+--------+--------+-----------+--+
| sid | 语文 | u2.数学 | u2.英语 | u2.政治 | u2.total |
+---------+--------+--------+--------+--------+-----------+--+
| 001 | 90.0 | 92.0 | 80.0 | 0.0 | 262.0 |
| 002 | 88.0 | 90.0 | 75.5 | 0.0 | 253.5 |
| 003 | 70.0 | 85.0 | 90.0 | 82.0 | 327.0 |
| total | 248.0 | 267.0 | 245.5 | 82.0 | 842.5 |
+---------+--------+--------+--------+--------+-----------+--+
drop table score;
create table score(
id int,
sid string,
subject string,
score double
)
row format delimited
fields terminated by ','
;
load data local inpath './hivedata/score' into table score;
select
sid,
sum(if(subject="语文",score,0)) as `语文`,
sum(if(subject="数学",score,0)) as `数学`,
sum(case when subject="英语" then score else 0 end) as `英语`,
sum(case when subject="政治" then score else 0 end) as `政治`,
sum(score) total
from score
group by sid
union
select "total",sum(`语文`),sum(`数学`),sum(`英语`),sum(`政治`),sum(total) from
(
select
sid,
sum(if(subject="语文",score,0)) as `语文`,
sum(if(subject="数学",score,0)) as `数学`,
sum(case when subject="英语" then score else 0 end) as `英语`,
sum(case when subject="政治" then score else 0 end) as `政治`,
sum(score) total
from score
group by sid
)t;
9、编写sql实现如下:
数据: t1表
uid tags
1 1,2,3
2 2,3
3 1,2
编写sql实现如下结果:
uid tag
1 1
1 2
1 3
2 2
2 3
3 1
3 2
create table t1(
uid int,
tags string
)
row format delimited
fields terminated by '\t'
;
load data local inpath './hivedata/t1' into table t1;
select uid,tag
from t1 lateral view explode(split(tags,",")) t as tag;
10、行转列
数据: T2表:
Tags
1,2,3
1,2
2,3
T3表:
id lab
1 A
2 B
3 C
根据T2和T3表的数据,编写sql实现如下结果:
+--------+--------+--+
| tags | labs |
+--------+--------+--+
| 1,2 | A,B |
| 1,2,3 | A,B,C |
| 2,3 | B,C |
+--------+--------+--+
create table t2(
tags string
);
load data local inpath './hivedata/t2' overwrite into table t2;
create table t3(
id int,
lab string
)
row format delimited
fields terminated by ' '
;
load data local inpath './hivedata/t3' overwrite into table t3;
select tags,
concat_ws(",",collect_set(lab)) labs
from
(select tags,lab
from
(select tags,tag
from t2 lateral view explode(split(tags,",")) A as tag) B
join t3 on B.tag = t3.id) C
group by tags;