MySQL大厂面试题

代码进阶中

已于 2024-04-01 17:59:28 修改

阅读量335

点赞数 9

文章标签： mysql 面试 etl 数据库开发

于 2024-03-17 18:57:44 首次发布

本文链接：https://blog.csdn.net/qq_41456129/article/details/136787328

版权

1.SQL执行顺序

SQL语句的执行顺序是: from (去加载table1 和 table2这2个表 ) -> join -> on -> where -> group by->select 后面的聚合函数count,sum -> having -> distinct -> order by -> limit

1.1 访问量统计

1.2 电商场景TopK统计

1.3 订单量统计

1.4 大数据拍讯统计

1.5 活跃用户统计

1.6 电商购买金额统计实战

1.7 教育领域SQL实战

1.8 服务器日志统计

1.9 充值日志SQL实战

1.10 电商分组TopK统计

2.行转列

行转列的常规做法是：group by +sum(if()) 或者 count(if())

2.1 华泰证劵1

已知

year	month	amount
1991	1	1.1
1991	2	1.2
1991	3	1.3
1991	4	1.4
1992	1	2.1
1992	2	2.2
1992	3	2.3
1992	4	2.4

查成这样一个结果

year	m1	m2	m3	m4
1991	1.1	1.2	1.3	1.4
1992	2.1	2.2	2.3	2.4

-- 创建表格
create table table2(year int,month int ,amount double) ;
insert into  test_sql.table2 values
           (1991,1,1.1),
           (1991,2,1.2),
           (1991,3,1.3),
           (1991,4,1.4),
           (1992,1,2.1),
           (1992,2,2.2),
           (1992,3,2.3),
           (1992,4,2.4);
select * from table2;

-- 方法一
select  year,
        sum(case when month=1 then amount else 0 end) as m1,
        sum(case when month=2 then amount else 0 end) as m2,
        sum(case when month=2 then amount else 0 end) as m3,
        sum(case when month=4 then amount else 0 end) as m4
from table2
group by year order by year
-- 方法二select  year,
        sum(if(month=1,amount,0)) as m1,
        sum(if(month=2,amount,0)) as m2,
        sum(if(month=3,amount,0))as m3,
        sum(if(month=4,amount,0)) as m4
from table2
group by year order by year

2.2 华泰证券2

create table student(sid int, sname varchar(10), gender char(1), class_id int);
insert into  student
values (1, '张三', '女', 1),
       (2, '李四', '女', 1),
       (3, '王五', '男', 2);

select * from student;

create table  course (cid int, cname varchar(10), teacher_id int);
insert  into course
values (1, '生物', 1),
       (2, '体育', 1),
       (3, '物理', 2);
select * from course;

create table score (sid int, student_id int, course_id int, number int);
insert   score
values (1, 1, 1, 58),
       (4, 1, 2, 50),
       (2, 1, 2, 68),
       (3, 2, 2, 89);
select * from score;

# 查询课程编号“2”的成绩比课程编号“1”低的所有同学的学号、姓名。


-- 方法一
select ac.sid,ac.sname from
student as ac
join     (
select student_id,
       sum(if(course_id=2,number,0)) as pe, 
       sum(if(course_id=1,number,0)) as bio 
from score
group by student_id
)as aa
 on ac.sid=aa.student_id
where aa.pe<bio

-- 方法二
select ac.sid,ac.sname from
student as ac
join     (
select student_id,
       sum(case when  course_id=2 then number else 0 end) as pe, 
       sum(case when  course_id=1 then number else 0 end) as bio 
from score
group by student_id
)as aa
 on ac.sid=aa.student_id
where aa.pe<bio

-- 方法三
with temp as (
select    *from (
select student_id,
       sum(case when  course_id=2 then number else 0 end) as pe, 
       sum(case when  course_id=1 then number else 0 end) as bio 
from score
group by student_id
)as aa
where aa.pe<bio)
select ac.sid,ac.sname
from  student as ac
join temp as ad
on ac.sid =ad.student_id

2.3 腾讯游戏

表table如下：

DDate	shengfu
2015-05-09	胜
2015-05-09	胜
2015-05-09	负
2015-05-09	负
2015-05-10	胜
2015-05-10	负
2015-05-10	负

如果要生成下列结果, 该如何写sql语句?

DDate	胜	负
2015-05-09	2	2
2015-05-10	1	2

create table table1(DDate char(10), shengfu char(1)) ;
insert  into table1  values ('2015-05-09', "胜"),
       ('2015-05-09', "胜"),
       ('2015-05-09', "负"),
       ('2015-05-09', "负"),
       ('2015-05-10', "胜"),
       ('2015-05-10', "负"),
       ('2015-05-10', "负");

select  * from table1

select  DDate,
        sum(case when shengfu="胜" then 1 else 0 end) as m1,
        sum(case when shengfu="负" then 1 else 0 end) as m2

from test_sql.table1
group by DDate order by DDate

select  DDate,
        sum(if(shengfu="胜",1,0)) as m1,
        sum(if(shengfu="负",1,0)) as m2

from test_sql.table1
group by DDate order by DDate

2.4.腾讯QQ

表5

qq号（字段名：qq）	游戏（字段名：game）
10000	a
10000	b
10000	c
20000	c
20000	d

表6

qq号（字段名：qq）	游戏（字段名：game）
10000	a_b_c
20000	c_d

create table tableA(qq char(5), game char(5))
insert into  tableA values
       (10000, 'a'),
       (10000, 'b'),
       (10000, 'c'),
       (20000, 'c'),
       (20000, 'd');

create table tableB(qq char(5), game char(5)) ;
insert into  tableB values
(10000, 'a_b_c'),
(20000, 'c_d');

# a,	将tableA输出为tableB的格式； 【行转列】
select  * from tableA

select  qq,replace(GROUP_CONCAT(game),',','_')game
from tableA
group by  qq
order by  qq

# --将tableB输出为tableA的格式;
select  *,SUBSTRING_INDEX(game, '_') from tableB


SELECT SUBSTRING_INDEX(SUBSTRING_INDEX(game, '_', n.digit), '_', -1) AS game_split
FROM tableB
JOIN (
    SELECT 0 AS digit UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3

) n ON CHAR_LENGTH(game) - CHAR_LENGTH(REPLACE(game, '_', '')) >= n.digit

3.连续N天登录

3.1 OPPO

以下为用户登陆游戏的日期，用一条sQL语句查询出连续三天登录的人员姓名

name	date
张三	2021-01-01
张三	2021-01-02
张三	2021-01-03
张三	2021-01-02
李四	2021-01-01
李四	2021-01-02
王五	2021-01-03
王五	2021-01-02
王五	2021-01-02

create table game(name char(2),  `date` char(10));
insert into  game values
('张三','2021-01-01'),
('张三','2021-01-02'),
('张三','2021-01-03'),
('张三','2021-01-02'),
('张三','2021-01-07'),
('张三','2021-01-08'),
('张三','2021-01-09'),
('李四','2021-01-01'),
('李四','2021-01-02'),
('王五','2021-01-03'),
('王五','2021-01-02'),
('王五','2021-01-02');
('胡超','2021-01-01');
('胡超','2021-01-03');


#方法一
SELECT distinct name
FROM  (
select  c.name,c.date,LEAD(c.date,2) over (PARTITION BY c.name ORDER BY c.date) AS RN
from (select distinct * from game) as c
)AS A
where  datediff(rn,date)=2

# 方法二，和一是一个逻辑思想
with temp  as (select distinct name,date from game),
     temp1 as (select *,date_Add(date,interval 2 day) as dt,lead(date,2) over (partition by name order by date) rn from temp)
select name from temp1 where dt =rn

3.2 脉脉

在过去一个月内,曾连续两天活跃的用户

create table dau(d char(10), uid int, module varchar(10), active_duration int);
insert into dau
values ('2020-01-01', 1, 'jobs', 324),
       ('2020-01-01', 2, 'feeds', 445),
       ('2020-01-01', 3, 'im', 345),
       ('2020-01-02', 2, 'network', 765),
       ('2020-01-02', 3, 'jobs', 342);
select *from dau;


# 方法一
with
    temp as (select distinct uid,d from dau ),
    temm as (select  * ,lead(d,1) over (partition by uid order by d) rn from temp)
    select distinct uid
        from temm where datediff(rn,d)=1

# 方法二
with
    temp as (select distinct uid,d from dau ),
    temm as (select  * ,date_sub(d,interval 1 day) from temp)
    select distinct uid
        from temm group by  uid having count(*)>=2


# 方法三
with t1 as ( select distinct  uid,d from dau),
     t2 as ( select *,row_number() over (partition by uid order by d) rn from t1),
     t3 as (select *,date_sub(d,interval rn day)ss from t2)
select distinct uid
from t3 group by uid,ss having count(*)>=2

3.3 广州银行

有一张表C_T（列举了部分数据）表示持卡人消费记录，表结构如下：

CARD NER	VARCHAR2	卡号，
C_MONTH	NUMBER	消费月份，
C_DATE	DATE	消费日期，
C_TYPEVAR	CHAR2	消费类型
C_ATM	NUMBER	消费金额

每个月每张卡连续消费的最大天数（如卡在当月只有一次消费则为1）。

连续消费天数：指一楼时间内连续每天都有消费，同一天有多笔消费算一天消费，不能跨月份统计。

create table c_t
(
    card_nbr char(1),
    c_month  char(7),
    c_date   char(10),
    c_type   char(2),
    c_atm    decimal
);
insert into  c_t values
                               (1,'2022-01','2022-01-01','网购',100),
                               (1,'2022-01','2022-01-02','网购',200),
                               (1,'2022-01','2022-01-03','网购',300),
                               (1,'2022-01','2022-01-15','网购',100),
                               (1,'2022-01','2022-01-16','网购',200),
                               (2,'2022-01','2022-01-06','网购',500),
                               (2,'2022-01','2022-01-07','网购',800),
                               (1,'2022-02','2022-02-01','网购',100),
                               (1,'2022-02','2022-02-02','网购',200),
                               (1,'2022-02','2022-02-03','网购',300),
                               (2,'2022-02','2022-02-06','网购',500),
                               (2,'2022-02','2022-02-07','网购',800);

select  * from c_t

with t1 as (select distinct card_nbr,c_month,c_date from c_t),
     t2 as (select *,row_number() over (partition by card_nbr,c_month order by c_date) rn from t1  ),
     t3 as (select *,date_sub(c_date, interval rn day) dt2 from t2  ),
     t4 as (select card_nbr,c_month ,dt2 ,count(*) cnt from t3 group by  card_nbr,c_month ,dt2),
     t5 as ( select *,row_number() over (partition by card_nbr,c_month order by cnt desc) as rn from t4)
     select card_nbr,c_month,cnt from t5 where rn=1

4.N日留存率

4.1 腾讯视频号游戏直播

表：tableA

ds(日期)	device	user_id	is_active
2020-03-01	ios	0001	0
2020-03-01	ios	0002	1
2020-03-01	android	0003	1
2020-03-02	ios	0001	0
2020-03-02	ios	0002	0
2020-03-02	android	0003	1

20200301的ios设备用户活跃的次日留存率是多少？

create table happy
(ds char(10) comment '(日期)'  ,device char(10),user_id char(10),is_active int) ;
insert into   happy values
('2020-03-01','ios','0001',0),
('2020-03-01','ios','0002',1),
('2020-03-01','ios','0004',1),
('2020-03-01','android','0003',1),
('2020-03-02','ios','0001',0),
('2020-03-02','ios','0002',0),
('2020-03-02','android','0003',1),
('2020-03-02','ios','0005',1) ,
('2020-03-02','ios','0004',1) ;

select * from happy

# 方法一
    with temp as(
select user_id,
       count(if(ds = '2020-03-01',1,null))cnt1,
       count(if(ds = '2020-03-02' and is_active =1 ,1,null)) cnt2
from happy
where device = 'IOS' AND ((ds='2020-03-01' and is_active=1)or ds = '2020-03-02')
group by  user_id
having cnt1>0)
select count(cnt1)                               sum1,
       count(if(cnt2 > 0, user_id, null))        sum2,
       count(if(cnt2 > 0, user_id, null)) / count(cnt1) rate
from temp;

# 方法二
    with temp as(
select user_id,
       count(if(ds = '2020-03-01',1,null))cnt1,
       count(if(ds = '2020-03-02' and is_active =1 ,1,null)) cnt2
from happy
where device = 'IOS' AND ((ds='2020-03-01' and is_active=1)or ds = '2020-03-02')
group by  user_id
having cnt1>0)
select sum(cnt1)        sum1,
       sum(cnt2)        sum2,
       sum(cnt2) / sum(cnt1) rate
from temp;

4.2 百度

写出用户表 tb_cuid_1d的 20200401 的次日、次7日留存的具体SQL ：
一条sql统计出以下指标 （4.1号uv，4.1号在4.2号的留存uv，4.1号在4.8号的留存uv）;

create table  tb_cuid_1d
(
    cuid         char(1) comment '用户的唯一标识',
    os           char(10) comment '平台',
    soft_version char(1) comment '版本',
    event_day    char(10) comment '日期',
    visit_time    int comment '用户访问时间戳',
    duration     decimal comment '用户访问时长'
);
insert into  tb_cuid_1d values
 (1,'android',1,'2020-04-01',1234567,100),
 (1,'android',1,'2020-04-02',1234567,100),
 (1,'android',1,'2020-04-08',1234567,100),
 (2,'android',1,'2020-04-01',1234567,100),
 (3,'android',1,'2020-04-02',1234567,100);

select * from tb_cuid_1d

# 方法一
select count(t1.cuid) as '4.1号uv',
       count(t2.cuid) as '4.1号在4.2号的留存uv',
       count(t3.cuid) as '4.1号在4.8号的留存uv'
from
          (select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-01' )as t1
left join (select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-02' )as t2
    on t1.cuid=t2.cuid
left join (select distinct cuid,event_day from tb_cuid_1d where event_day = '2020-04-08' )as t3
    on t1.cuid=t3.cuid

# 方法二
with temp as(
select cuid,
       count(if(event_day = '2020-04-01',1,null)) as uv1,
       count(if(event_day = '2020-04-02',1,null)) as uv2,
       count(if(event_day = '2020-04-08',1,null)) as uv8
from tb_cuid_1d
where event_day in ('2020-04-01','2020-04-02','2020-04-08')
group by cuid
having uv1>0
)
select sum(uv1)as u1,
       sum(uv2)as u2,
       sum(uv2)/sum(uv1) as 'u2/u1',
       sum(uv8)as u8,
       sum(uv8)/sum(uv1) as 'u8/u1'

from temp

5.分组内top前几

5.1 跨越物流

员工表结构

员工表数据

题目描述

求出每个部门工资最高的前三名员工，并计算这些员工的工资占所属部门总工资的百分比。

结果

create table emp(empno char(5) ,ename char(6),hiredate char(10),sal int ,deptno char(2));
insert into  emp values
('7521', 'WARD', '1981-2-22', 1250, 30),
('7566', 'JONES', '1981-4-2', 2975, 20),
('7876', 'ADAMS', '1987-7-13', 1100, 20),
('7369', 'SMITH', '1980-12-17', 800, 20),
('7934', 'MILLER', '1982-1-23', 1300, 10),
('7844', 'TURNER', '1981-9-8', 1500, 30),
('7782', 'CLARK', '1981-6-9', 2450, 10),
('7839', 'KING', '1981-11-17', 5000, 10),
('7902', 'FORD', '1981-12-3', 3000, 20),
('7499', 'ALLEN', '1981-2-20', 1600, 30),
('7654', 'MARTIN', '1981-9-28', 1250, 30),
('7900', 'JAMES', '1981-12-3', 950, 30),
('7788', 'SCOTT', '1987-7-13', 3000, 20),
('7698', 'BLAKE', '1981-5-1', 2850, 30);
select * from emp;


# --求出每个部门工资最高的前三名员工，并计算这些员工的工资占所属部门总工资的百分比。

select empno,ename,hiredate,sal,round(sal/total,2) as rate
from (
select *,row_number() over (partition by deptno order by sal desc) as rn,sum(sal) over(partition by deptno) as total
from emp
) as temp
where temp.rn<4

# 5.2小米电商
create table empp(name char(2) , month char(2), amt int);
insert into  empp values ('张三', '01', 100),
       ('李四', '02', 120),
       ('王五', '03', 150),
       ('赵六', '04', 500),
       ('张三', '05', 400),
       ('李四', '06', 350),
       ('王五', '07', 180),
       ('赵六', '08', 400);

select * from empp


select * ,row_number() over (order by total desc )as rn ,concat(round(totaL/(sum(total) over()) *100,2),'%') as rate
        from (
select name,sum(amt) total from empp group by  name)as aa

5.2 小米电商

订单表，torder. 字段，user_id, order_id, ctime(10位时间戳)，city id，sale_num，sku_id(商品)

问题：20201201至今每日订单量top10的城市及其订单量(订单量对order id去重)(在线写)

create table ordertable (user_id char(3),
                      order_id char(3),
                      ctime char(10),
                      city_id char(5),
                      sale_num int ,
                      sku_id varchar(100)) ;
insert into  ordertable values
('zs','001','2020-12-01','杭州',2,'鞋子'),
('ls','002','2020-12-01','杭州',1,'衣服'),
('ww','003','2020-12-01','杭州',1,'小米12'),
('zl','004','2020-12-01','杭州',1,'小米11'),
('zs','005','2020-12-01','上海',2,'鞋子'),
('zs','005','2020-12-01','上海',2,'裤子'),
('ls','006','2020-12-01','上海',1,'衣服'),
('ww','007','2020-12-01','上海',1,'小米12'),
('zs','008','2020-12-01','武汉',2,'鞋子'),
('ls','009','2020-12-01','武汉',1,'衣服'),
('zs','010','2020-12-01','长沙',2,'鞋子'),
('zs','011','2020-12-02','上海',2,'鞋子'),
('ls','012','2020-12-02','上海',1,'衣服'),
('ww','013','2020-12-02','上海',1,'小米12'),
('zl','014','2020-12-02','上海',1,'小米11'),
('zs','015','2020-12-02','广州',2,'鞋子'),
('ls','016','2020-12-02','广州',1,'衣服'),
('ww','017','2020-12-02','广州',1,'小米12'),
('zs','018','2020-12-02','武汉',2,'鞋子'),
('ls','019','2020-12-02','武汉',1,'衣服');


select * from ordertable

# 订单表，torder.  字段，user_id, order_id, ctime(10位时间戳)，city id，sale_num，sku_id(商品)
# 问题：20201201至今每日订单量top10的城市及其订单量(订单量对order id去重)(在线写)

select order_time,cnt,cnt
from (
select *,row_number() over (partition by order_time,city_id order by cnt desc) as rn
from(
select date(ctime) order_time,city_id,count(distinct order_id) cnt
from ordertable
where date(ctime) between '2020-12-01' and current_date
group by  date(ctime),city_id) as aa ) as bb
where bb.rn<=10