笔记
基本语法
- 判断
null
只能用is null/ is not null
- 如果有
group by
子句,那么select
的聚合函数在分组操作之后执行,并作用于每一组 - 子句执行顺序:
where
-非聚合函数
-group by
-聚合函数
-having
select name, sum(amount) balance
from Users a join Transactions b using (account)
group by name, account
having balance > 10000;
- 常用函数
# 删除语句
delete from <表名> where <条件>;
# 更新语句
update <表名> set <属性=...> where <条件>;
# 插入语句
insert into <表名> <字段> values <插入值>;
# 条件判断
if(<条件>, <条件为真取值>, <条件为假取值>)
case when <条件1> then <值1> ... else <默认值> end
# 字符串处理
left(<字符串>, <截取位数>)
right(<字符串>, <截取位数>)
upper(<字符串>)
lower(<字符串>)
length(<字符串>)
concat(<拼接字符串1>, <拼接字符串2>)
group_concat(<字段名> order by <顺序> separator <分隔符>) # 连接同组的字符串
# 日期处理
datediff(<日期1>, <日期2>) # 返回日期1-日期2的天差值
year(<日期>), month(<日期>), date(<日期>) # 获取年/月/日
<日期1> >= <日期2> # 日期可以直接比较大小
查询执行顺序
1. from # 首先进入from字句
2. on # 根据条件选择需要连接的行
3. join # 执行连接
4. where # 对连接结果过滤
5. group by # 分组(**在分组之前执行了 select 后面的 if、substring_index... 等非聚合函数操作,可以用这些字段做分组**)
6. avg(), sum(), count()... # **聚合**函数
7. having # 对各个分组分别进行过滤
8. select # 选择结果
9. distinct # 结果去重
10. union # 将当前结果并上其它结果
11. order by [asc, desc] # 排序
12. limit # 选择指定行作为结果
问题1 中间表命名
- MySQL 需要给作为表的子查询的中间表起名
- 子查询结果是单列时无需起名,用
=(单值)
或者in(多值)
判断即可 - 子查询结果只要是多列,肯定在
FROM
后面作为表,作为表需要取别名,否则这张表没有名称则无法访问表中的字段 - 只要
FROM
子句使用子查询,就要给中间表命名
# 结果是多列的,或作为FROM子句,子查询需要起名(订单最多的客户)
select t.customer_number from (
# 内层循环统计订单数并排序
select customer_number
from Orders
group by customer_number
order by count(*) desc
limit 0, 1
) t;
# 结果是单列的子查询无需起名(找出未和RED公司交易的销售员)
select name
from SalesPerson
where sales_id not in (
# 找出和RED公司销售过的人
select p.sales_id
from SalesPerson p left join Orders o on (p.sales_id = o.sales_id)
where o.com_id = (
# 找出RED公司的id
select com_id
from Company
where name = "RED"
)
);
问题2 聚合函数的使用位置
- 绝不能在
where
子句中使用,但是可以在select
、order by
和having
子句中使用
# 在order by中使用(统计订单数最多的客户)
select customer_number
from Orders
group by customer_number
order by count(*) desc
limit 1;
# 在having中使用(找出重复email)
select email Email
from Person
group by email
having count(*) > 1;
问题3 非聚合函数作为 GROUP BY 字段
- 非聚合函数指的是每行映射一个值,而非聚合函数【多行映射一个值】
- SELECT 后跟字段如果使用了非聚合函数,可以作为 GROUP BY 的字段
# 例题:计算25岁以上和以下的用户综述
select if(age >= 25, '25岁及以上', '25岁以下') age_cut, count(*) number
from user_profile
group by age_cut;
问题4 关联子查询和非关联子查询
- 子查询又称内部查询,分为关联子查询和非关联子查询
- 关联子查询【例题12】
- 执行依赖于外部查询的数据
- 外部查询每访问一行,子查询就执行一次
- 先执行外部查询,再执行子查询
关联子查询的执行过程
- 从外层查询中取出一个元组(即一行),将元组相关列的值传给内层查询
- 执行内层查询,得到子查询操作的值
- 外查询根据子查询返回的结果或结果集得到满足条件的行
- 然后外层查询取出下一个元组重复上述步骤,直到外层的元组全部处理完毕
- 非关联子查询(嵌套子查询)
- 独立于外部查询的子查询,不使用外部查询的数据
- 子查询总共执行一次,执行完毕后将值传递给外部查询
- 先执行子查询,再执行外部查询
题目
1 首次登陆平台的日期
select player_id, min(event_date) first_login
from Activity
group by player_id
order by player_id;
2 删除重复的电子邮箱
delete from Person
where id not in (
select t.id from (
select min(id) id # 1.内层查询选出每个电子邮箱的最小值
from Person
group by email
) t # 2.MySQL不支持同一个表同时进行查找和删除,嵌套一层查询
);
3 订单最多的客户
# 直接使用聚合函数字段进行排序,最简洁
select customer_number
from Orders
group by customer_number
order by count(*) desc
limit 1;
# 以下实现比较麻烦
select customer_number
from (
# 内层查询统计id和订单数
select customer_number, count(*) num
from Orders
group by customer_number
order by num desc
limit 1
) t; # 子查询作为FROM子句,需要命名
4 查找仅在2019春季售出的商品
select distinct p.product_id, p.product_name
from Product p join Sales s on (p.product_id = s.product_id)
where s.sale_date >= '2019-01-01' and s.sale_date <= '2019-03-31'
and p.product_id not in (
# 子查询寻找在日期之外售出的商品
select distinct product_id
from Sales
where sale_date < '2019-01-01' or sale_date > '2019-03-31'
);
5 重新格式化部门表
select id,
sum(if(month = "Jan", revenue, null)) Jan_Revenue,
sum(if(month = "Feb", revenue, null)) Feb_Revenue,
sum(if(month = "Mar", revenue, null)) Mar_Revenue,
sum(if(month = "Apr", revenue, null)) Apr_Revenue,
sum(if(month = "May", revenue, null)) May_Revenue,
sum(if(month = "Jun", revenue, null)) Jun_Revenue,
sum(if(month = "Jul", revenue, null)) Jul_Revenue,
sum(if(month = "Aug", revenue, null)) Aug_Revenue,
sum(if(month = "Sep", revenue, null)) Sep_Revenue,
sum(if(month = "Oct", revenue, null)) Oct_Revenue,
sum(if(month = "Nov", revenue, null)) Nov_Revenue,
sum(if(month = "Dec", revenue, null)) Dec_Revenue
from Department
group by id # 不能 group by id, month 因为这样结果会出现重复id
order by id;
6 按日期分组统计销售情况
select sell_date, count(distinct product) num_sold, group_concat(distinct product order by product asc separator ",") products
from Activities
group by sell_date
order by sell_date;
7 进店却未进行过交易的顾客
# 外层查询,去除结果的0
select customer_id, count_no_trans
from(
# 内层查询,找到每个人不交易的次数,包括0次
select customer_id, sum(if(b.amount is null, 1, 0)) count_no_trans
from Visits a left join Transactions b using (visit_id)
group by customer_id
) as t
where count_no_trans > 0;
8 每个产品在不同商店的价格
select product_id, "store1" store, store1 price
from Products
where store1 is not null
union
select product_id, "store2" store, store2 price
from Products
where store2 is not null
union
select product_id, "store3" store, store3 price
from Products
where store3 is not null;
9 丢失信息的雇员
# 解法1
select employee_id
from (
# 查找所有编号
select employee_id
from Employees
union
select employee_id
from Salaries
) t1 # 作为from子句的子查询,需要命名
where employee_id not in (
# 查找不缺信息的人
select employee_id
from Employees join Salaries using (employee_id)
)
order by employee_id;
# 解法2,手动实现全外连接
select employee_id
from Employees left join Salaries using (employee_id)
where salary is null
union
select employee_id
from Employees right join Salaries using (employee_id)
where name is null
order by employee_id;
10 树节点
select id, (case
when p_id is null then 'Root'
when id in (
# 子查询寻找作为父节点的id
select distinct p_id
from tree
) then 'Inner'
else 'Leaf'
end
) type
from tree
order by id;
11 第二高的薪水
# 如果没有符合题意的行,则返回0行
select distinct salary SecondHighestSalary
from Employee
order by salary desc
limit 1, 1;
# 如果没有符合题意的行,则返回null
select max(salary) SecondHighestSalary
from Employee
where salary < (
# 内层查询最高工资
select salary
from Employee
order by salary desc
limit 1
);
12 分数排名【关联子查询】
select s.score, (
# 内层查询查找比当前值大的个数
# **关联子查询,需要访问到外层的值,先执行外层查询再执行内层查询**
select count(distinct score)
from Scores
where score >= s.score
) 'rank'
from Scores s
order by s.score desc;
13 连续出现的数字
# 使用from多表查询
# from多表+where过滤=逻辑上的join
select distinct l1.num ConsecutiveNums
from Logs l1, Logs l2, Logs l3
where l1.num = l2.num and l1.id = l2.id + 1 and l1.num = l3.num and l1.id = l3.id + 2;
14 每个部门工资最高的员工
- 明确步骤:先找每个部门最高的工资,然后对应到个人信息
select b.name Department, a.name Employee, a.salary Salary
from (
# 子查询寻找每个部门的最大工资
select departmentId, max(salary) max_salary
from Employee
group by departmentId
) t, Employee a join Department b on (a.departmentId = b.id)
where t.departmentId = a.departmentId and t.max_salary = a.salary;
15 每个部门工资前三高的员工
- 明确步骤:找出每个部门前三高的工资,然后对应到个人信息
- 使用关联子查询,类似根据分数排名的题目【12题】
select d.name Department, e.name Employee, e.salary Salary
from Department d join Employee e on (d.id = e.departmentId)
where (
# 关联子查询,寻找当前部门更高的工资,类似rank排名
select count(distinct salary)
from Employee
where departmentId = e.departmentId and e.salary < salary
) <= 2;
16 股票利润
select stock_name, sum(if(operation = 'Buy', -price, price)) capital_gain_loss
from Stocks
group by stock_name;
17 年度买家订单数统计
select a.user_id buyer_id, a.join_date, sum(if(b.buyer_id is null, 0, 1)) orders_in_2019
from Users a left join Orders b on (a.user_id = b.buyer_id and year(b.order_date) = 2019) # 只能在on中过滤,在where中过滤的话不能统计2019无订单的人
group by a.user_id, a.join_date;
18 换座位
select s1.id, if(s2.student is null, s1.student, s2.student) student
from Seat s1 left join Seat s2 on (if(s1.id % 2 = 1, s1.id = s2.id - 1, s1.id = s2.id + 1))
order by s1.id;
19 体育馆的人流量
select s1.id, s1.visit_date, s1.people
from Stadium s1, Stadium s2, Stadium s3
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100
union
select s2.id, s2.visit_date, s2.people
from Stadium s1, Stadium s2, Stadium s3
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100
union
select s3.id, s3.visit_date, s3.people
from Stadium s1, Stadium s2, Stadium s3
where s1.id + 1 = s2.id and s1.id + 2 = s3.id and s1.people >= 100 and s2.people >= 100 and s3.people >= 100
order by id;
20 找出每个学校 GPA 最低的同学
select u.device_id, u.university, u.gpa
from user_profile u
where (u.university, u.gpa) in (
# 子查询找出每个学校的最低分数
select university, min(gpa) min_gpa
from user_profile
group by university
)
order by u.university;
21 计算用户平均次日留存率
- 计算公式
第一天来且第二天来的不同ID/总ID数
q1.device_id = q2.device_id and datediff(q2.date, q1.date) = 1
放在on
而非where
中过滤,否则会统计不到一些用户
select count(distinct q2.device_id, q2.date) / count(distinct q1.device_id, q1.date) avg_ret
from question_practice_detail q1 left join question_practice_detail q2
on (q1.device_id = q2.device_id and datediff(q2.date, q1.date) = 1);