SQL进阶之HAVING子句进阶

HAVING子句进阶

越前须知(雾)

  • 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
  • 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
  • 本篇主题为HAVING子句进阶用法,基础用法见同系列前文。

具体用法

查询集合

简单条件

Q:查询可以出勤的队伍,即对内所有成员都处于“待命”状态
A:NOT EXISTS + 否定 / HAVING + CASE

  • 创表
CREATE TABLE Teams
(member  CHAR(12) NOT NULL PRIMARY KEY,
 team_id INTEGER  NOT NULL,
 status  CHAR(8)  NOT NULL);INSERT INTO Teams VALUES('乔',     1, '待命');
INSERT INTO Teams VALUES('肯',     1, '出勤中');
INSERT INTO Teams VALUES('米克',   1, '待命');
INSERT INTO Teams VALUES('卡伦',   2, '出勤中');
INSERT INTO Teams VALUES('凯斯',   2, '休息');
INSERT INTO Teams VALUES('简',     3, '待命');
INSERT INTO Teams VALUES('哈特',   3, '待命');
INSERT INTO Teams VALUES('迪克',   3, '待命');
INSERT INTO Teams VALUES('贝斯',   4, '待命');
INSERT INTO Teams VALUES('阿伦',   5, '出勤中');
INSERT INTO Teams VALUES('罗伯特', 5, '休息');
INSERT INTO Teams VALUES('卡根',   5, '待命');
  • NOT EXISTS + 否定:没有一个成员不是“待命”状态
select team_id, member 
    from Teams T1 
    where not exists (select * from Teams T2 
                          where T1.team_id = T2.team_id 
                          and T2.status <> '待命');
  • HAVING:缺点是无法展示每组成员信息
select team_id 
    from Teams 
    group by team_id 
    having count(*) = sum(case when status = '待命' 
                               then 1 
                               else 0 end);

-- having另一种写法,
-- 优点:极值函数还能使用参数段的索引,性能更好;缺点:但如果列表中有null则不一定能筛除;
select team_id 
    from Teams 
    group by team_id 
    having max(status) = '待命' 
    and min(status) = '待命';

-- 把having内容移到select 
select team_id, 
       case when max(status) = '待命' and min(status) = '待命' 
            then '全都在待命' 
            else '队长!人手不够!' end as 'status'
    from Teams
    group by team_id  -- 用了聚合函数max()、min() 必须用group by
    order by team_id;

Q:查找进货重复的地区和产品名称,即查找存在重复值的集合,如下图
A:EXISTS / COUNT(A) <> COUNT(Distinct A)
集合重复值示例1

  • 创表
CREATE TABLE Materials
(center         CHAR(12) NOT NULL,
 receive_date   DATE     NOT NULL,
 material       CHAR(12) NOT NULL,
 PRIMARY KEY(center, receive_date));INSERT INTO Materials VALUES('东京' ,'2007-4-01', '锡');
INSERT INTO Materials VALUES('东京' ,'2007-4-12', '锌');
INSERT INTO Materials VALUES('东京' ,'2007-5-17', '铝');
INSERT INTO Materials VALUES('东京' ,'2007-5-20', '锌');
INSERT INTO Materials VALUES('大阪' ,'2007-4-20', '铜');
INSERT INTO Materials VALUES('大阪' ,'2007-4-22', '镍');
INSERT INTO Materials VALUES('大阪' ,'2007-4-29', '铅');
INSERT INTO Materials VALUES('名古屋', '2007-3-15',  '钛');
INSERT INTO Materials VALUES('名古屋', '2007-4-01',  '钢');
INSERT INTO Materials VALUES('名古屋', '2007-4-24',  '钢');
INSERT INTO Materials VALUES('名古屋', '2007-5-02',  '镁');
INSERT INTO Materials VALUES('名古屋', '2007-5-10',  '钛');
INSERT INTO Materials VALUES('福冈' ,'2007-5-10', '锌');
INSERT INTO Materials VALUES('福冈' ,'2007-5-28', '锡');
  • EXISTS
-- 查询重复的地区及物料
-- 不是全称量化,无法用not exists 和双重否定
select distinct center, material 
    from Materials M1 
    where exists (select * from Materials M2
                      where M1.center = M2.center 
                        and M1.receive_date <> M2.receive_date 
                        and M1.material = M2.material);

-- 全称量化可用于查询“没重复的地区及物料”
select center, material 
	from Materials M1 
	where not exists (select * 
						  from Materials M2
                          where M1.center = M2.center 
                          and M1.receive_date <> M2.receive_date 
                          and M1.material = M2.material);
  • HAVING
-- 查询有重复物料的地区
select center 
    from Materials 
    group by center 
    having count(material) <> count(distinct material);-- 移到select 
select center, 
       case when count(material) <> count(distinct material) 
            then '存在重复' 
            else '不存在重复' end
    from Materials
    group by center;  -- 使用了聚合函数

Q:(升级)在前一个问题基础上,加入“原产国”字段,查询材料和原产国两个字段都重复的地区和产品信息。
A:巧用CONCAT()

  • 创表
CREATE TABLE Materials2
(center VARCHAR(32) NOT NULL,
 receive_date DATE  NOT NULL,
 material VARCHAR(32) NOT NULL,
 orgland  VARCHAR(32) NOT NULL,
 PRIMARY KEY(center, receive_date, material));INSERT INTO Materials2 VALUES('东京',   '2007-04-01', '锡',  '智利');
INSERT INTO Materials2 VALUES('东京',   '2007-04-12', '锌',  '泰国');
INSERT INTO Materials2 VALUES('东京',   '2007-05-17', '铝',  '巴西');
INSERT INTO Materials2 VALUES('东京',   '2007-05-20', '锌',  '泰国');
INSERT INTO Materials2 VALUES('大阪',   '2007-04-20', '铜',  '澳大利亚');
INSERT INTO Materials2 VALUES('大阪',   '2007-04-22', '镍',  '南非');
INSERT INTO Materials2 VALUES('大阪',   '2007-04-29', '铅',  '印度');
INSERT INTO Materials2 VALUES('名古屋', '2007-03-15', '钛',   '玻利维亚');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-01', '钢',   '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-04-24', '钢',   '阿根廷');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-02', '镁',   '智利');
INSERT INTO Materials2 VALUES('名古屋', '2007-05-10', '钛',   '泰国');
INSERT INTO Materials2 VALUES('福冈',   '2007-05-10', '锌',  '美国');
INSERT INTO Materials2 VALUES('福冈',   '2007-05-28', '锡',  '俄罗斯');
  • 巧用CONCAT()
select center 
    from Materials2 
    group by center 
    having count(concat(material, orgland)) 
            <> 
           count(distinct concat(material, orgland));
复杂条件
  • 创表
CREATE TABLE TestResults
(student CHAR(12) NOT NULL PRIMARY KEY,
 class   CHAR(1)  NOT NULL,
 sex     CHAR(1)  NOT NULL,
 score   INTEGER  NOT NULL);INSERT INTO TestResults VALUES('001', 'A', '男', 100);
INSERT INTO TestResults VALUES('002', 'A', '女', 100);
INSERT INTO TestResults VALUES('003', 'A', '女',  49);
INSERT INTO TestResults VALUES('004', 'A', '男',  30);
INSERT INTO TestResults VALUES('005', 'B', '女', 100);
INSERT INTO TestResults VALUES('006', 'B', '男',  92);
INSERT INTO TestResults VALUES('007', 'B', '男',  80);
INSERT INTO TestResults VALUES('008', 'B', '男',  80);
INSERT INTO TestResults VALUES('009', 'B', '女',  10);
INSERT INTO TestResults VALUES('010', 'C', '男',  92);
INSERT INTO TestResults VALUES('011', 'C', '男',  80);
INSERT INTO TestResults VALUES('012', 'C', '女',  21);
INSERT INTO TestResults VALUES('013', 'D', '女', 100);
INSERT INTO TestResults VALUES('014', 'D', '女',   0);
INSERT INTO TestResults VALUES('015', 'D', '女',   0);

Q:查询75%以上学生的分数都在80分以上的班级
A:COUNT(*) * 0.75 <= SUM(CASE)

select class from TestResults
    group by class
    having count(*) * 0.75 
            <= 
           sum(case when score >= 80 then 1 else 0 end);

Q:查询分数在50以上的男生人数比分数在50分以上的女生人数多的班级
A:两个 SUM(CASE) 对比

select class from TestResults
    group by class
    having sum(case when score >= 50 and sex = '男' then 1 else 0 end)
            >
           sum(case when score >= 50 and sex = '女' then 1 else 0 end);

Q:查询女生平均分比男生平均分高的班级
A:两个 AVG(CASE) 对比

select class from TestResults
    group by class
    having avg(case when sex = '女' then score else null end)
            >   -- 因为有些班级只有一种性别,需要用 else null 而不是 else 0
            -- 若统一用 else 0,查询结果包括 D 班,不准确
           avg(case when sex = '男' then score else null end);

  1. MICK[日] 《SQL进阶教程》 ↩︎

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值