大厂SQL面试题

本文介绍了如何使用SQL查询大学考试成绩,找出各科排名前10%和30%的学生ID,涉及数据预处理、分组统计、排名函数应用和多表关联。还展示了如何在销售数据和日志表中运用类似方法,如找出特定条件下的顶级销售经理和连续登录用户。
摘要由CSDN通过智能技术生成

1.现在有大学考试成绩如下表A:id,subject,score,找出每一科都是这一科前10%的学生的id
1)建表及准备数据

create table tmp
(
    nam char(10),
    lesson char(20),
    mark int
);
INSERT into tmp VALUES('a1','c1',80),('a2','c2',80),('a3','c1',90),('a4','c1',90),('a5','c2',70),
('a6','c3',70),('a7','c1',60),('a8','c2',60),('a9','c2',80),('a3','c2',90),('a3','c3',80);

查询表结果:
在这里插入图片描述
2)先求每门课排名前10%的学生

//每门课排名前10%的学生
(select nam,lesson,mark from 
(select *,ntile(5) over (partition by lesson order by mark desc) as rn
from tmp) t where rn = 1);

在这里插入图片描述
3)求课程总数

//求一共有多少门课程
select count(distinct lesson) from tmp;
//优化
select count(*) from (select * from tmp group by lesson) as t;

在这里插入图片描述
4)按照nam分组,求count(*)等于全部课程

select nam from (select nam,lesson,mark from 
(select *,ntile(2) over (partition by lesson order by mark desc) as rn
from tmp) t where rn = 1) as temp group by nam
having count(*)=(select count(*) from (select * from tmp group by lesson) as t);

在这里插入图片描述
2.现在有大学考试成绩如下表A:id,subject,score,找出每一科都是这一科前30%的学生的id
使用上一题数据

#先计算每门课前30%的人数各有多少
select lesson,round(count(*)*0.3) as num from tmp group by lesson;#t1 
#或
select lesson,round(count(*)*0.3,0) as num from tmp group by lesson;#t1

在这里插入图片描述

#分别对每门课进行课内排名
select *,row_number() over(partition by lesson order by mark desc) as r from tmp;#t2

在这里插入图片描述

#两个表相关联,并求出每门课前30%的所有学生
select t1.lesson,t1.num,t2.nam,t2.mark,t2.r
from (select lesson,round(count(*)*0.3) as num from tmp group by lesson) t1
join (select *,row_number() over(partition by lesson order by mark desc) as r from tmp) t2
on t1.lesson=t2.lesson
where t2.r<=t1.num;#temp

在这里插入图片描述

#求一共有多少门课程以及优化
select count(distinct lesson) from tmp;
select count(*) from (select * from tmp group by lesson) as t;

在这里插入图片描述

#求出每门课排名都在前30%的学生
select nam from
(select t1.lesson,t1.num,t2.nam,t2.mark,t2.r
from (select lesson,round(count(*)*0.3) as num from tmp group by lesson) t1
join (select *,row_number() over(partition by lesson order by mark desc) as r from tmp) t2
on t1.lesson=t2.lesson
where t2.r<=t1.num) as temp
group by nam having count(*)=(select count(*) from (select * from tmp group by lesson) as t);

在这里插入图片描述
3.用rank求排名的
使用第一题数据:
在这里插入图片描述
1)题目描述:对每门课程进行排名

select *,rank() over(partition by lesson order by mark desc) as r
from tmp;
select *,dense_rank() over(partition by lesson order by mark desc) as r
from tmp;

2)题目描述求每门课程分数最高的学生

select nam,lesson,mark from 
(select *,dense_rank() over(partition by lesson order by mark desc) as r
from tmp) as t
where t.r=1;

3)统计班级课程成绩有三门高于80的学生信息?

#1.求每门课程高于80的所有学生
select * from tmp where mark>=80;#t

#2.按照姓名分组,求每组课程数,输出课程数为3的
select * from (select * from tmp where mark>=80) as t group by nam having count(*)=3;

4.销售表sales_dtl有城市(city),销售经理(cbm),客户(customer),销售金额(sale_amt)三列字段,请用sql取出每个城市中销售金额为本城市前10名的销售经理的信息。
此题类似于求每门课程分数排名前2名的学生的信息。

#每门课程分数排名前2名的学生的信息

 #1.求出每门课程分数排名
 select *,rank() over(partition by lesson order by mark desc) as r
 from tmp;#t
 
 #2.求出前两名信息
 select nam,lesson,mark from (select *,rank() over(partition by lesson order by mark desc) as r
 from tmp) t where t.r<=2;

5.
1)
在这里插入图片描述

create table dws_ae_sir_1d(
	store char(4),
    cate char(20),
    sell_cnt int,
    gmv int
);
insert into dws_ae_sir_1d values('A1','女装',20,300),('A2','女装',10,500),('B1','男装',15,400),
('C1','鞋类',40,850),('A3','女装',30,600),('A4','女装',25,400),('B2','男装',25,600),
('C2','鞋类',50,1000),('B3','男装',20,500),('C3','鞋类',45,900);
select * from dws_ae_sir_1d;


#查询出每个行业销量最高的top2商家,并给出按照销售额从高到低给出综合排序(需展示出序号)
#1.按照每个行业内部销售额排序
select *,rank() over(partition by cate order by sell_cnt desc) as r from dws_ae_sir_1d;#t

#2.求出top2
select * from 
(select *,rank() over(partition by cate order by sell_cnt desc) as r from dws_ae_sir_1d) t
where t.r<=2;

6.如何将表dept的dname字段的值转化成一个字符串,以逗号分隔(concat_ws()和collect_set()函数的使用)
列转行相关函数:CONCAT(),CONCAT_WS(),COLLECT_SET()

#COLLECT_SET(col):函数只接受基本数据类型,主要作用是将某字段的值进行去重汇总,产生array类型字段。与COLLECT_LIST()区别:COLLECT_LIST()不会去重。
select collect_set(dname) from dept;
#结果:[ACCOUNTING,RESEARCH,SALES,OPERATIONS]

#CONCAT_WS():特殊形式的CONCAT(),第一个参数是剩余参数间的分隔符。
CONCAT_WS(',',collect_set(dname)) from dept;
#结果:ACCOUNTING,RESEARCH,SALES,OPERATIONS

7.如何将表table3的中生成的那个字符串变成原先table1的形式(UDTF中的explode()和lateral view()函数的使用)
1)单个lateral view题目:
在这里插入图片描述

#行转列:explode()、lateral view()
select A,B,C from table_1 LATERAL VIEW explode(B) table1 as B;

2)多个lateral view题目:
在这里插入图片描述
在这里插入图片描述

SELECT myCol1, myCol2 FROM baseTable
LATERAL VIEW explode(col1) myTable1 AS myCol1
LATERAL VIEW explode(col2) myTable2 AS myCol2;

3)复杂方式lateral view()题目:
tb_split表数据如下:
在这里插入图片描述

select datenu,des,type from tb_split
lateral view explode(split(des,"//|")) tb1 as des
lateral view explode(split(type,"//|")) tb2 as type

8.表table1(usr_id int,usr_dt string),其中id字段数值相同,找出表中连续活跃的天数/连续发文天数/连续登陆天数,如果不连续,则当前行连续活跃字段设为1,返回查询结果table2(窗口函数)

#数据准备
create table tmp_continous(
	user_id char(4),
	login_date date
);

INSERT into tmp_continous values('123', '2018-08-02'),('123','2018-08-03'),
('123','2018-08-04'),('456','2018-11-02'),('456','2018-12-09'),('789','2018-01-01'),
('789','2018-04-23'),('789','2018-09-10'),('789','2018-09-11'),('789','2018-09-12');

在这里插入图片描述

# 对user_id分组排序
select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous;#t1

在这里插入图片描述

# 若连续登陆,则login_date减去no后应该是同一天
# select *,adddate(login_date,-no) as groupInfo from t1;
select *,adddate(login_date,-no) as groupInfo from 
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1;#t2

在这里插入图片描述

# 按照groupInfo分组
# select *,count(*) as c from t2 group by groupInfo;

select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from 
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2 
group by groupInfo;#t3

在这里插入图片描述

# 按照user_id分组,求每个分组最大值即最大连续登陆天数
# select *,max(c) from t3 group by user_id;
select *,max(c) from 
(select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from 
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2 
group by groupInfo) t3 
group by user_id;

在这里插入图片描述
9.SQL题 给一个简单日志表,包含userid ,activateddate,每个用户每天最多只有一条数据,输出连续3天登陆快手的userid
前面的步骤与上一题相同,倒数第二步改为:

select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from 
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2 
group by groupInfo having c=3;#t3

在这里插入图片描述
10.第一天登录的,第二天也登陆的user_id
上一题小小改动

select *,count(*) as c from (select *,adddate(login_date,-no) as groupInfo from 
(select *,row_number() over(partition by user_id order by login_date) as no from tmp_continous) as t1) t2 
group by groupInfo having c>=2;#t3

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值