HAVING子句进阶
越前须知(雾)
- 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
- 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
- 本篇主题为HAVING子句进阶用法,基础用法见同系列前文。
具体用法
查询集合
简单条件
Q:查询可以出勤的队伍,即对内所有成员都处于“待命”状态
A:NOT EXISTS + 否定 / HAVING + CASE
- 创表
CREATE TABLE Teams
(member CHAR(12) NOT NULL PRIMARY KEY,
team_id INTEGER NOT NULL,
status CHAR(8) NOT NULL);
INSERT INTO Teams VALUES('乔', 1, '待命');
INSERT INTO Teams VALUES('肯', 1, '出勤中');
INSERT INTO Teams VALUES('米克', 1, '待命');
INSERT INTO Teams VALUES('卡伦', 2, '出勤中');
INSERT INTO Teams VALUES('凯斯', 2, '休息');
INSERT INTO Teams VALUES('简', 3, '待命');
INSERT INTO Teams VALUES('哈特', 3, '待命');
INSERT INTO Teams VALUES('迪克', 3, '待命');
INSERT INTO Teams VALUES('贝斯', 4, '待命');
INSERT INTO Teams VALUES('阿伦', 5, '出勤中');
INSERT INTO Teams VALUES('罗伯特', 5, '休息');
INSERT INTO Teams VALUES('卡根', 5, '待命');
- NOT EXISTS + 否定:没有一个成员不是“待命”状态
select team_id, member
from Teams T1
where not exists (select * from Teams T2
where T1.team_id = T2.team_id
and T2.status <> '待命');
- HAVING:缺点是无法展示每组成员信息
select team_id
from Teams
group by team_id
having count(*) = sum(case when status = '待命'
then 1
else 0 end);
-- having另一种写法,
-- 优点:极值函数还能使用参数段的索引,性能更好;缺点:但如果列表中有null则不一定能筛除;
select team_id
from Teams
group by team_id
having max(status) = '待命'
and min(status) = '待命';
-- 把having内容移到select
select team_id,
case when max(status) = '待命' and min(status) = '待命'
then '全都在待命'
else '队长!人手不够!' end as 'status'
from Teams
group by team_id -- 用了聚合函数max()、min() 必须用group by
order by team_id;
Q:查找进货重复的地区和产品名称,即查找存在重复值的集合,如下图
A:EXISTS / COUNT(A) <> COUNT(Distinct A)
- 创表
CREATE TABLE Materials
(center CHAR(12) NOT NULL,
receive_date DATE NOT NULL,
material CHAR(12) NOT NULL,
PRIMARY KEY(center, receive_date));
INSERT INTO Materials VALUES('东京' ,'2007-4-01', '锡');
INSERT INTO Materials VALUES('东京' ,'2007-4-12', '锌');
INSERT INTO Materials VALUES('东京' ,'2007-5-17', '铝');
INSERT INTO Materials VALUES('东京' ,'2007-5-20', '锌');
INSERT INTO Materials VALUES('大阪' ,'2007-4-20', '铜');
INSERT INTO Materials VALUES('大阪' ,'2007-4-22', '镍');
INSERT INTO Materials VALUES('大阪' ,'2007-4-29', '铅');
INSERT INTO Materials VALUES('名古屋', '2007-3-15', '钛');
INSERT INTO Materials VALUES('名古屋', '2007-4-01', '钢');
INSERT INTO Materials VALUES('名古屋', '2007-4-24', '钢');
INSERT INTO Materials VALUES('名古屋', '2007-5-02', '镁');
INSERT INTO Materials VALUES('名古屋', '2007-5-10', '钛');
INSERT INTO Materials VALUES('福冈' ,'2007-5-10', '锌');
INSERT INTO Materials VALUES('福冈' ,'2007-5-28', '锡');
- EXISTS
-- 查询重复的地区及物料
-- 不是全称量化,无法用not exists 和双重否定
select distinct center, material
from Materials M1
where exists (select * from Materials M2
where M1.center = M2.center
and M1.receive_date <> M2.receive_date
and M1.material = M2.material);
-- 全称量化可用于查询“没重复的地区及物料”
select center, material
from Materials M1
where not exists (select *
from Materials M2
where M1.center = M2.center
and M1.receive_date <> M2.receive_date
and M1.material = M2.material);
- HAVING
-- 查询有重复物料的地区
select center
from Materials
group by center
having count(material) <> count(distinct material);
-- 移到select
select center,
case when count(material) <> count(distinct material)
then '存在重复'
else '不存在重复' end
from Materials
group by center; -- 使用了聚合函数
Q:(升级)在前一个问题基础上,加入“原产国”字段,查询材料和原产国两个字段都重复的地区和产品信息。
A:巧用CONCAT()
- 创表
CREATE TABLE Materials2
(center VARCHAR(32) NOT NULL,
receive_date DATE NOT NULL,
material VARCHAR(32) NOT NULL,
orgland VARCHAR(32) NOT NULL,
PRIMARY KEY(center, receive_date, material));
INSERT INTO Materials2 VALUES('东京', '2007-04-01', '锡', '智利');
INSERT INTO Materials2 VALUES('东京', '2007-04-12', '锌', '泰国');
INSERT INTO Materials2 VALUES('东京', '2007-05-17', '铝', '巴西');
INSERT INTO Materials2 VALUES('东京', '2007-05-20', '锌', '泰国');
INSERT INTO Materials2 VALUES('大阪', '2007-04-20', '铜', '澳大利亚');
INSERT INTO Materials2 VALUES('大阪', '2007-04-22', '镍', '南非');
INSERT INTO Materials2 VALUES('大阪', '2007-04-29', '铅', '印度');
INSERT INTO Materials2 VALUES('名古屋', '2007-03-15', '钛', '玻利维亚');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-01', '钢', '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-24', '钢', '阿根廷');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-02', '镁', '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-10', '钛', '泰国');
INSERT INTO Materials2 VALUES('福冈', '2007-05-10', '锌', '美国');
INSERT INTO Materials2 VALUES('福冈', '2007-05-28', '锡', '俄罗斯');
- 巧用CONCAT()
select center
from Materials2
group by center
having count(concat(material, orgland))
<>
count(distinct concat(material, orgland));
复杂条件
- 创表
CREATE TABLE TestResults
(student CHAR(12) NOT NULL PRIMARY KEY,
class CHAR(1) NOT NULL,
sex CHAR(1) NOT NULL,
score INTEGER NOT NULL);
INSERT INTO TestResults VALUES('001', 'A', '男', 100);
INSERT INTO TestResults VALUES('002', 'A', '女', 100);
INSERT INTO TestResults VALUES('003', 'A', '女', 49);
INSERT INTO TestResults VALUES('004', 'A', '男', 30);
INSERT INTO TestResults VALUES('005', 'B', '女', 100);
INSERT INTO TestResults VALUES('006', 'B', '男', 92);
INSERT INTO TestResults VALUES('007', 'B', '男', 80);
INSERT INTO TestResults VALUES('008', 'B', '男', 80);
INSERT INTO TestResults VALUES('009', 'B', '女', 10);
INSERT INTO TestResults VALUES('010', 'C', '男', 92);
INSERT INTO TestResults VALUES('011', 'C', '男', 80);
INSERT INTO TestResults VALUES('012', 'C', '女', 21);
INSERT INTO TestResults VALUES('013', 'D', '女', 100);
INSERT INTO TestResults VALUES('014', 'D', '女', 0);
INSERT INTO TestResults VALUES('015', 'D', '女', 0);
Q:查询75%以上学生的分数都在80分以上的班级
A:COUNT(*) * 0.75 <= SUM(CASE)
select class from TestResults
group by class
having count(*) * 0.75
<=
sum(case when score >= 80 then 1 else 0 end);
Q:查询分数在50以上的男生人数比分数在50分以上的女生人数多的班级
A:两个 SUM(CASE) 对比
select class from TestResults
group by class
having sum(case when score >= 50 and sex = '男' then 1 else 0 end)
>
sum(case when score >= 50 and sex = '女' then 1 else 0 end);
Q:查询女生平均分比男生平均分高的班级
A:两个 AVG(CASE) 对比
select class from TestResults
group by class
having avg(case when sex = '女' then score else null end)
> -- 因为有些班级只有一种性别,需要用 else null 而不是 else 0
-- 若统一用 else 0,查询结果包括 D 班,不准确
avg(case when sex = '男' then score else null end);
MICK[日] 《SQL进阶教程》 ↩︎