SQL 做题笔记

MONA ODYSSEY

已于 2023-04-03 21:25:29 修改

阅读量126

点赞数 1

分类专栏：算法做题笔记文章标签： mysql

于 2023-03-21 19:52:49 首次发布

本文链接：https://blog.csdn.net/weixin_43785688/article/details/129696031

版权

算法做题笔记专栏收录该内容

13 篇文章 0 订阅

订阅专栏

笔记

基本语法

判断 null 只能用 is null/ is not null
如果有 group by 子句，那么 select 的聚合函数在分组操作之后执行，并作用于每一组
子句执行顺序：where - 非聚合函数 - group by - 聚合函数 - having

select name, sum(amount) balance 
from Users a join Transactions b using (account) 
group by name, account
having balance > 10000;

常用函数

# 删除语句
delete from <表名> where <条件>;
# 更新语句
update <表名> set <属性=...> where <条件>;
# 插入语句
insert into <表名> <字段> values <插入值>;

# 条件判断
if(<条件>, <条件为真取值>, <条件为假取值>)
case when <条件1> then <值1> ... else <默认值> end

# 字符串处理
left(<字符串>, <截取位数>)
right(<字符串>, <截取位数>)
upper(<字符串>)
lower(<字符串>)
length(<字符串>)
concat(<拼接字符串1>, <拼接字符串2>)
group_concat(<字段名> order by <顺序> separator <分隔符>)  # 连接同组的字符串

# 日期处理
datediff(<日期1>, <日期2>)  # 返回日期1-日期2的天差值
year(<日期>), month(<日期>), date(<日期>)  # 获取年/月/日
<日期1> >= <日期2>  # 日期可以直接比较大小

查询执行顺序

1. from  # 首先进入from字句
2. on  # 根据条件选择需要连接的行
3. join  # 执行连接
4. where  # 对连接结果过滤
5. group by  # 分组（**在分组之前执行了 select 后面的 if、substring_index... 等非聚合函数操作，可以用这些字段做分组**）
6. avg(), sum(), count()...  # **聚合**函数
7. having  # 对各个分组分别进行过滤
8. select  # 选择结果
9. distinct  # 结果去重
10. union  # 将当前结果并上其它结果
11. order by [asc, desc] # 排序
12. limit  # 选择指定行作为结果

问题1 中间表命名

MySQL 需要给作为表的子查询的中间表起名
子查询结果是单列时无需起名，用 =（单值） 或者 in（多值） 判断即可
子查询结果只要是多列，肯定在 FROM 后面作为表，作为表需要取别名，否则这张表没有名称则无法访问表中的字段
只要 FROM 子句使用子查询，就要给中间表命名

# 结果是多列的，或作为FROM子句，子查询需要起名（订单最多的客户）
select t.customer_number from (
    # 内层循环统计订单数并排序
    select customer_number 
    from Orders 
    group by customer_number 
    order by count(*) desc
    limit 0, 1
) t;

# 结果是单列的子查询无需起名(找出未和RED公司交易的销售员)
select name 
from SalesPerson 
where sales_id not in (
    # 找出和RED公司销售过的人
    select p.sales_id 
    from SalesPerson p left join Orders o on (p.sales_id = o.sales_id) 
    where o.com_id = (
        # 找出RED公司的id
        select com_id 
        from Company 
        where name = "RED"
    )
);

问题2 聚合函数的使用位置

绝不能在 where 子句中使用，但是可以在 select、order by 和 having 子句中使用

# 在order by中使用（统计订单数最多的客户）
select customer_number 
from Orders 
group by customer_number 
order by count(*) desc
limit 1;

# 在having中使用（找出重复email）
select email Email 
from Person
group by email 
having count(*) > 1;

问题3 非聚合函数作为 GROUP BY 字段

非聚合函数指的是每行映射一个值，而非聚合函数【多行映射一个值】
SELECT 后跟字段如果使用了非聚合函数，可以作为 GROUP BY 的字段

# 例题：计算25岁以上和以下的用户综述
select if(age >= 25, '25岁及以上', '25岁以下') age_cut, count(*) number
from user_profile
group by age_cut;

问题4 关联子查询和非关联子查询

子查询又称内部查询，分为关联子查询和非关联子查询
关联子查询【例题12】
- 执行依赖于外部查询的数据
- 外部查询每访问一行，子查询就执行一次
- 先执行外部查询，再执行子查询

关联子查询的执行过程

从外层查询中取出一个元组（即一行），将元组相关列的值传给内层查询
执行内层查询，得到子查询操作的值
外查询根据子查询返回的结果或结果集得到满足条件的行
然后外层查询取出下一个元组重复上述步骤，直到外层的元组全部处理完毕

非关联子查询（嵌套子查询）
- 独立于外部查询的子查询，不使用外部查询的数据
- 子查询总共执行一次，执行完毕后将值传递给外部查询
- 先执行子查询，再执行外部查询

题目

1 首次登陆平台的日期

题目
在这里插入图片描述

select player_id, min(event_date) first_login 
from Activity 
group by player_id 
order by player_id;

2 删除重复的电子邮箱

题目
在这里插入图片描述

delete from Person 
where id not in (
    select t.id from (
        select min(id) id  # 1.内层查询选出每个电子邮箱的最小值
        from Person 
        group by email
    ) t  # 2.MySQL不支持同一个表同时进行查找和删除，嵌套一层查询
);

3 订单最多的客户

题目
在这里插入图片描述

# 直接使用聚合函数字段进行排序，最简洁
select customer_number 
from Orders 
group by customer_number 
order by count(*) desc
limit 1;

# 以下实现比较麻烦
select customer_number 
from (
    # 内层查询统计id和订单数
    select customer_number, count(*) num 
    from Orders 
    group by customer_number 
    order by num desc
    limit 1
) t;  # 子查询作为FROM子句，需要命名

4 查找仅在2019春季售出的商品

题目
在这里插入图片描述

select distinct p.product_id, p.product_name 
from Product p join Sales s on (p.product_id = s.product_id) 
where s.sale_date >= '2019-01-01' and s.sale_date <= '2019-03-31' 
and p.product_id not in (
    # 子查询寻找在日期之外售出的商品
    select distinct product_id 
    from Sales 
    where sale_date < '2019-01-01' or sale_date > '2019-03-31'
);

5 重新格式化部门表

题目
在这里插入图片描述

select id, 
sum(if(month = "Jan", revenue, null)) Jan_Revenue,
sum(if(month = "Feb", revenue, null)) Feb_Revenue,
sum(if(month = "Mar", revenue, null)) Mar_Revenue,
sum(if(month = "Apr", revenue, null)) Apr_Revenue,
sum(if(month = "May", revenue, null)) May_Revenue,
sum(if(month = "Jun", revenue, null)) Jun_Revenue,
sum(if(month = "Jul", revenue, null)) Jul_Revenue,
sum(if(month = "Aug", revenue, null)) Aug_Revenue,
sum(if(month = "Sep", revenue, null)) Sep_Revenue,
sum(if(month = "Oct", revenue, null)) Oct_Revenue,
sum(if(month = "Nov", revenue, null)) Nov_Revenue,
sum(if(month = "Dec", revenue, null)) Dec_Revenue
from Department 
group by id  # 不能 group by id, month 因为这样结果会出现重复id
order by id;

6 按日期分组统计销售情况

题目
在这里插入图片描述

select sell_date, count(distinct product) num_sold, group_concat(distinct product order by product asc separator ",") products 
from Activities 
group by sell_date 
order by sell_date;

7 进店却未进行过交易的顾客

题目
在这里插入图片描述

# 外层查询，去除结果的0
select customer_id, count_no_trans 
from(
    # 内层查询，找到每个人不交易的次数，包括0次
    select customer_id, sum(if(b.amount is null, 1, 0)) count_no_trans 
    from Visits a left join Transactions b using (visit_id) 
    group by customer_id
) as t
where count_no_trans > 0;

8 每个产品在不同商店的价格

题目
在这里插入图片描述

select product_id, "store1" store, store1 price 
from Products
where store1 is not null
union
select product_id, "store2" store, store2 price 
from Products
where store2 is not null
union
select product_id, "store3" store, store3 price 
from Products
where store3 is not null;

9 丢失信息的雇员

题目
在这里插入图片描述

# 解法1
select employee_id 
from (
    # 查找所有编号
    select employee_id 
    from Employees
    union 
    select employee_id 
    from Salaries
) t1  # 作为from子句的子查询，需要命名
where employee_id not in (
    # 查找不缺信息的人
    select employee_id 
    from Employees join Salaries using (employee_id) 
)
order by employee_id;

# 解法2，手动实现全外连接
select employee_id 
from Employees left join Salaries using (employee_id)
where salary is null
union
select employee_id 
from Employees right join Salaries using (employee_id) 
where name is null
order by employee_id;

10 树节点

题目
在这里插入图片描述

select id, (case
    when p_id is null then 'Root' 
    when id in (
        # 子查询寻找作为父节点的id
        select distinct p_id 
        from tree
    ) then 'Inner'
    else 'Leaf'
    end
) type 
from tree
order by id;

11 第二高的薪水

题目

# 如果没有符合题意的行，则返回0行
select distinct salary SecondHighestSalary
from Employee  
order by salary desc
limit 1, 1;

# 如果没有符合题意的行，则返回null
select max(salary) SecondHighestSalary
from Employee  
where salary < (
    # 内层查询最高工资
    select salary 
    from Employee 
    order by salary desc
    limit 1
);

12 分数排名【关联子查询】

题目
在这里插入图片描述

select s.score, (
    # 内层查询查找比当前值大的个数
	# **关联子查询，需要访问到外层的值，先执行外层查询再执行内层查询**
    select count(distinct score)
    from Scores 
    where score >= s.score
) 'rank'
from Scores s
order by s.score desc;

13 连续出现的数字

题目
在这里插入图片描述

# 使用from多表查询
# from多表+where过滤=逻辑上的join
select distinct l1.num ConsecutiveNums 
from Logs l1, Logs l2, Logs l3 
where l1.num = l2.num and l1.id = l2.id + 1 and l1.num = l3.num and l1.id = l3.id + 2;

14 每个部门工资最高的员工

题目
在这里插入图片描述

明确步骤：先找每个部门最高的工资，然后对应到个人信息

select b.name Department, a.name Employee, a.salary Salary 
from (
    # 子查询寻找每个部门的最大工资
    select departmentId, max(salary) max_salary
    from Employee 
    group by departmentId
) t, Employee a join Department b on (a.departmentId = b.id)
where t.departmentId = a.departmentId and t.max_salary = a.salary;

15 每个部门工资前三高的员工

题目

明确步骤：找出每个部门前三高的工资，然后对应到个人信息
使用关联子查询，类似根据分数排名的题目【12题】

select d.name Department, e.name Employee, e.salary Salary 
from Department d join Employee e on (d.id = e.departmentId) 
where (
    # 关联子查询，寻找当前部门更高的工资，类似rank排名
    select count(distinct salary) 
    from Employee 
    where departmentId = e.departmentId and e.salary < salary
) <= 2;

16 股票利润

题目
在这里插入图片描述

select stock_name, sum(if(operation = 'Buy', -price, price)) capital_gain_loss 
from Stocks 
group by stock_name;

17 年度买家订单数统计

题目
在这里插入图片描述

select a.user_id buyer_id, a.join_date, sum(if(b.buyer_id is null, 0, 1)) orders_in_2019 
from Users a left join Orders b on (a.user_id = b.buyer_id and year(b.order_date) = 2019)  # 只能在on中过滤，在where中过滤的话不能统计2019无订单的人
group by a.user_id, a.join_date;

18 换座位

题目
在这里插入图片描述

select s1.id, if(s2.student is null, s1.student, s2.student) student
from Seat s1 left join Seat s2 on (if(s1.id % 2 = 1, s1.id = s2.id - 1, s1.id = s2.id + 1)) 
order by s1.id;

19 体育馆的人流量

题目
在这里插入图片描述

select s1.id, s1.visit_date, s1.people 
from Stadium s1, Stadium s2, Stadium s3 
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100 
union 
select s2.id, s2.visit_date, s2.people 
from Stadium s1, Stadium s2, Stadium s3 
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100 
union 
select s3.id, s3.visit_date, s3.people 
from Stadium s1, Stadium s2, Stadium s3 
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100
order by id;

20 找出每个学校 GPA 最低的同学

添加链接描述
在这里插入图片描述

select u.device_id, u.university, u.gpa 
from user_profile u
where (u.university, u.gpa) in (
    # 子查询找出每个学校的最低分数
    select university, min(gpa) min_gpa 
    from user_profile 
    group by university
)
order by u.university;

21 计算用户平均次日留存率

添加链接描述
在这里插入图片描述

计算公式 第一天来且第二天来的不同ID/总ID数
q1.device_id = q2.device_id and datediff(q2.date, q1.date) = 1 放在 on 而非 where 中过滤，否则会统计不到一些用户

select count(distinct q2.device_id, q2.date) / count(distinct q1.device_id, q1.date) avg_ret
from question_practice_detail q1 left join question_practice_detail q2 
on (q1.device_id = q2.device_id and datediff(q2.date, q1.date) = 1);