SQL进阶之EXISTS谓词的用法
EXISTS谓词的用法
越前须知(雾)
- 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
- 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
概述
- 支撑SQL和关系数据库的两个基础理论:一是数学领域的集合论;二是现代逻辑学标准体系的谓词逻辑‘
- 谓词是一种特殊的函数,返回值是真值,表可以被认为是真命题的集合
- EXISTS的输入值是行数据的集合,即“二阶谓词”:
1⃣️ “一阶谓词”:如 =、BETWEEN,输入值为一行;
2⃣️ “二阶谓词”:如EXISTS、HAVING,输入值为行数据的集合;
3⃣️ ”三阶谓词“:输入值为”集合的集合“,应用如查询包含某个A集合的集合
4⃣️ “四阶谓词”:输入值为“集合的集合的集合”
具体用法
查询表中“不存在的数据”
- 创表
CREATE TABLE Meetings
(meeting CHAR(32) NOT NULL,
person CHAR(32) NOT NULL,
PRIMARY KEY (meeting, person));
INSERT INTO Meetings VALUES('第1次', '伊藤');
INSERT INTO Meetings VALUES('第1次', '水岛');
INSERT INTO Meetings VALUES('第1次', '坂东');
INSERT INTO Meetings VALUES('第2次', '伊藤');
INSERT INTO Meetings VALUES('第2次', '宫田');
INSERT INTO Meetings VALUES('第3次', '坂东');
INSERT INTO Meetings VALUES('第3次', '水岛');
INSERT INTO Meetings VALUES('第3次', '宫田');
Q:表中为参加了某次会议的人,需要查询“没有出席某次会议的人”
A:先假设所有人都参加了全部会议,以此生成一个大集合,再从中减去实际参加会议的人
- CROSS JOIN
select distinct M1.meeting, M2.person
-- 必须是M2.person,如果是M1.person,与实际参加一样
from Meetings M1 cross join Meetings M2
where not exists (select *
from Meetings M3
where M3.meeting = M1.meeting
and M3.person = M2.person)
order by M1.meeting;
- EXCEPT(MySQL尚未支持)
select M1.meeting, M2.person from Meetings M1 cross join Meetings M2
except
select meeting, person from Meetings;
全称量化:肯定 <=> 双重否定
- 创表
-- 成绩示例1
CREATE TABLE TestScores
(student_id INTEGER,
subject VARCHAR(32) ,
score INTEGER,
PRIMARY KEY(student_id, subject));
INSERT INTO TestScores VALUES(100, '数学',100);
INSERT INTO TestScores VALUES(100, '语文',80);
INSERT INTO TestScores VALUES(100, '理化',80);
INSERT INTO TestScores VALUES(200, '数学',80);
INSERT INTO TestScores VALUES(200, '语文',95);
INSERT INTO TestScores VALUES(300, '数学',40);
INSERT INTO TestScores VALUES(300, '语文',90);
INSERT INTO TestScores VALUES(300, '社会',55);
INSERT INTO TestScores VALUES(400, '数学',80);
-- 工程队示例2
CREATE TABLE Projects
(project_id VARCHAR(32),
step_nbr INTEGER ,
status VARCHAR(32),
PRIMARY KEY(project_id, step_nbr));
INSERT INTO Projects VALUES('AA100', 0, '完成');
INSERT INTO Projects VALUES('AA100', 1, '等待');
INSERT INTO Projects VALUES('AA100', 2, '等待');
INSERT INTO Projects VALUES('B200', 0, '等待');
INSERT INTO Projects VALUES('B200', 1, '等待');
INSERT INTO Projects VALUES('CS300', 0, '完成');
INSERT INTO Projects VALUES('CS300', 1, '完成');
INSERT INTO Projects VALUES('CS300', 2, '等待');
INSERT INTO Projects VALUES('CS300', 3, '等待');
INSERT INTO Projects VALUES('DY400', 0, '完成');
INSERT INTO Projects VALUES('DY400', 1, '完成');
INSERT INTO Projects VALUES('DY400', 2, '完成');
Q:查询所有科目都在50分以上的学生
A:“所有科目都在50分以上” <=> “没有一科低于50分”
- EXISTS
select student_id, score -- 逐行对比,若遇到有科目低于50分则排除
from TestScores t1
where not exists (select * from TestScores t2
where t1.student_id = t2.student_id
and t2.score < 50);
- HAVING
select student_id
from TestScores t1
group by student_id
having min(score) >= 50;
Q:查询“1.数学成绩在80分以上;2.语文成绩在50分以上“的学生
A:在所有语文、数学成绩记录中,某个学生数学成绩不低于80,语文成绩不低于50
- EXISTS
select student_id
from TestScores t1
where t1.subject in ('数学','语文') -- 只考虑数学和语文学科,不考虑其他
and not exists (select * from TestScores t2
-- 子查询为cross join列表,not exists即筛选掉所有交叉列表
where t2.student_id = t1.student_id
and 1 = case when t2.subject = '数学'
and t2.score < 80 then 1 -- 特征函数,满足条件返回1
when t2.subject = '语文'
and t2.score < 50 then 1
else 0 end);
-- 优化:400同学没有语文成绩
select student_id
from TestScores t1
where t1.subject in ('数学','语文')
and not exists (select * from TestScores t2
where t1.student_id = t2.student_id
and 1 = case when t2.subject = '数学'
and t2.score < 80 then 1
when t2.subject = '语文'
and t2.score < 50 then 1
else 0 end)
group by student_id
having count(*) = 2; -- 显著学科数量
Q:查询完成到“工程1”状态的项目
A:恰好完成到“工程1”,则若step_nbr <= 1状态为“完成”,若step_nbr > 1状态为“等待”
- HAVING
select project_id
from Projects
group by project_id -- group 之后只能显示项目序号,无法显示每一步骤对应状态
having count(*) = sum(case when step_nbr <= 1 and status = '完成' then 1
when step_nbr > 1 and status = '等待' then 1
else 0 end);
- EXISTS
select * from Projects p1
where not exists (select * from Projects p2
where p1.project_id = p2.project_id
and status <> case when step_nbr <= 1 -- 双重否定
then '完成'
else '等待' end);
EXISTS的优点:
- 性能好,只要有一行符合status的条件,查询就会终止,不一定需要查询所有行数据,且能通过连接条件使用project_id的索引,查询速度更快;
- 包含更多信息量,HAVING因为被聚合,只能显示项目名称,无法显示每步骤状态
对行、列进行量化
- 创表
-- 列量化示例1
CREATE TABLE ArrayTbl
(keycol CHAR(1) PRIMARY KEY,
col1 INTEGER,
col2 INTEGER,
col3 INTEGER,
col4 INTEGER,
col5 INTEGER,
col6 INTEGER,
col7 INTEGER,
col8 INTEGER,
col9 INTEGER,
col10 INTEGER);
-- 全是1的行
INSERT INTO ArrayTbl VALUES('C', 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-- 全是NULL的行
INSERT INTO ArrayTbl VALUES('A', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO ArrayTbl VALUES('B', 3, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-- 存在9的行
INSERT INTO ArrayTbl VALUES('D', NULL, NULL, 9, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO ArrayTbl VALUES('E', NULL, 3, NULL, 1, 9, NULL, NULL, 9, NULL, NULL);
-- 行量化示例2
CREATE TABLE ArrayTbl2
(key1 CHAR(1) NOT NULL,
i INTEGER NOT NULL,
val INTEGER,
PRIMARY KEY (key1, i));
-- A全为NULL、B仅有一个为非NULL、C全为非NULL
INSERT INTO ArrayTbl2 VALUES('A', 1, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 2, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 3, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 4, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 5, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 6, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 7, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 8, NULL);
INSERT INTO ArrayTbl2 VALUES('A', 9, NULL);
INSERT INTO ArrayTbl2 VALUES('A',10, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 1, 3);
INSERT INTO ArrayTbl2 VALUES('B', 2, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 3, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 4, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 5, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 6, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 7, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 8, NULL);
INSERT INTO ArrayTbl2 VALUES('B', 9, NULL);
INSERT INTO ArrayTbl2 VALUES('B',10, NULL);
INSERT INTO ArrayTbl2 VALUES('C', 1, 1);
INSERT INTO ArrayTbl2 VALUES('C', 2, 1);
INSERT INTO ArrayTbl2 VALUES('C', 3, 1);
INSERT INTO ArrayTbl2 VALUES('C', 4, 1);
INSERT INTO ArrayTbl2 VALUES('C', 5, 1);
INSERT INTO ArrayTbl2 VALUES('C', 6, 1);
INSERT INTO ArrayTbl2 VALUES('C', 7, 1);
INSERT INTO ArrayTbl2 VALUES('C', 8, 1);
INSERT INTO ArrayTbl2 VALUES('C', 9, 1);
INSERT INTO ArrayTbl2 VALUES('C',10, 1);
delete from ArrayTbl2 where key1 = 'B' and val = 3;
INSERT INTO ArrayTbl2 VALUES('B', 1, 1);
Q:行量化:查询全是1的行、全是NULL的行及存在9的行
A:简单粗暴列出所有列名
-- 全为1的行,MySQL不支持
select * from ArrayTbl
where 1 = all (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10);
-- 全为NULL的行
select * from ArrayTbl
where coalesce(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10) is null;
-- 存在9的行
select * from ArrayTbl
where 9 in (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10);
Q:行量化:查询val全为1的key,即C
A:EXISTS 双重否定 / ALL限定谓语 / HAVING子句
- EXISTS双重否定
select key1, i from ArrayTbl2 a1
where not exists (select * from ArrayTbl2 a2
where a1.key1 = a2.key1
and (a2.val is null or a2.val <> 1));
-- 子查询匹配条件加入a2.i = a1.i,存在缺陷:
-- 侥幸躲过(‘B',1,3),如果是(’B‘,1,1)则无法正确筛选
select key1, i from ArrayTbl2 a1
where not exists (select * from ArrayTbl2 a2
where a1.key1 = a2.key1
and a2.i = a1.i
and (a2.val is null or a2.val <> 1));
-- 两个子查询区别在于a1.i = a2.i,若加上这个子句,子查询结果不包含(B,1,1),not exists之后包含(B,1,1)
- ALL限定谓词
select distinct key1
from ArrayTbl2 a1
where 1 = all (select val from ArrayTbl2 a2
where a1.key1 = a2.key1);
- HAVING子句
select key1 from ArrayTbl2 -- 可以正确筛选(’B‘,1,1)
group by key1
having sum(case when val = 1
then 1
else null end) = 10;
select key1 from ArrayTbl2
-- 侥幸躲过(‘B',1,3),如果是(’B‘,1,1)则无法正确筛选
group by key1
having max(val) = 1 and min(val) = 1; -- NULL无法比较大小
MICK[日] 《SQL进阶教程》 ↩︎