用SQL进行集合运算
越前须知(雾)
- 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
- 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
概述
SQL能操作具有重复行的集合:
- UNION会排除重复行,默认进行排序,性能较差;
- UNION ALL不排除重复行,不排序,性能较好
具体用法
比较两表是否相等
CREATE TABLE Tbl_A
(keycol CHAR(1) PRIMARY KEY,
col_1 INTEGER,
col_2 INTEGER,
col_3 INTEGER);
CREATE TABLE Tbl_B
(keycol CHAR(1) PRIMARY KEY,
col_1 INTEGER,
col_2 INTEGER,
col_3 INTEGER);
-- 两表相等
INSERT INTO Tbl_A VALUES('A', 2, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', 5, 1, 6);
INSERT INTO Tbl_B VALUES('A', 2, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 9);
INSERT INTO Tbl_B VALUES('C', 5, 1, 6);
-- 两表不相等
set sql_safe_updates = 0;
TRUNCATE Tbl_A;
INSERT INTO Tbl_A VALUES('A', 2, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', 5, 1, 6);
TRUNCATE Tbl_B;
INSERT INTO Tbl_B VALUES('A', 2, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 8);
INSERT INTO Tbl_B VALUES('C', 5, 1, 6);
Q:如何判断两表相等 / 两表不相等?
A:UNION
- UNION的幂等性:S UNION S = S
select case when (select count(*)
from (select * from Tbl_A union select * from Tbl_B) TMP1)
= (select count(*) from Tbl_A)
and (select count(*)
from (select * from Tbl_A union select * from Tbl_B) TMP2)
= (select count(*) from Tbl_B)
then '两表一致' else '两表不一致' end as result;
-- 更准确的写法
select case when count(*) = (select count(*) from Tbl_A)
and count(*) = (select count(*) from Tbl_B)
then '两表相等' else '两表不相等' end as result
from (select * from Tbl_A union select * from Tbl_B) TMP;
- (A UNION B) EXCEPT (A INTERSECT B) = NULL,则A = B
-- MySQL不支持 intersect 和 except
select case when count(*) = 0 then '两表相等'
else '两表不相等' end as result
from ((select * from Tbl_A union select * from Tbl_B)
except
(select * from Tbl_A intersect select * from Tbl_B)) TMP;
用差集实现关系除法运算
- 创表
CREATE TABLE Skills
(skill VARCHAR(32),
PRIMARY KEY(skill));
CREATE TABLE EmpSkills
(emp VARCHAR(32),
skill VARCHAR(32),
PRIMARY KEY(emp, skill));
INSERT INTO Skills VALUES('Oracle');
INSERT INTO Skills VALUES('UNIX');
INSERT INTO Skills VALUES('Java');
INSERT INTO EmpSkills VALUES('相田', 'Oracle');
INSERT INTO EmpSkills VALUES('相田', 'UNIX');
INSERT INTO EmpSkills VALUES('相田', 'Java');
INSERT INTO EmpSkills VALUES('相田', 'C#');
INSERT INTO EmpSkills VALUES('神崎', 'Oracle');
INSERT INTO EmpSkills VALUES('神崎', 'UNIX');
INSERT INTO EmpSkills VALUES('神崎', 'Java');
INSERT INTO EmpSkills VALUES('平井', 'UNIX');
INSERT INTO EmpSkills VALUES('平井', 'Oracle');
INSERT INTO EmpSkills VALUES('平井', 'PHP');
INSERT INTO EmpSkills VALUES('平井', 'Perl');
INSERT INTO EmpSkills VALUES('平井', 'C++');
INSERT INTO EmpSkills VALUES('若田部', 'Perl');
INSERT INTO EmpSkills VALUES('渡来', 'Oracle');
Q:查询掌握Skills表中全部技能的员工,即相田和神崎
A:Skills - 每个员工skill = NULL,则员工掌握所有需要技能
select distinct emp
from EmpSkills ES1
where not exists (select skill
from Skills
intersect
select skill
from EmpSkills ES2
where ES1.emp = ES2.emp
在同一张表中寻找相同子集
- 创表
CREATE TABLE SupParts
(sup CHAR(32) NOT NULL,
part CHAR(32) NOT NULL,
PRIMARY KEY(sup, part));
INSERT INTO SupParts VALUES('A', '螺丝');
INSERT INTO SupParts VALUES('A', '螺母');
INSERT INTO SupParts VALUES('A', '管子');
INSERT INTO SupParts VALUES('B', '螺丝');
INSERT INTO SupParts VALUES('B', '管子');
INSERT INTO SupParts VALUES('C', '螺丝');
INSERT INTO SupParts VALUES('C', '螺母');
INSERT INTO SupParts VALUES('C', '管子');
INSERT INTO SupParts VALUES('D', '螺丝');
INSERT INTO SupParts VALUES('D', '管子');
INSERT INTO SupParts VALUES('E', '保险丝');
INSERT INTO SupParts VALUES('E', '螺母');
INSERT INTO SupParts VALUES('E', '管子');
INSERT INTO SupParts VALUES('F', '保险丝');
Q:查询零件种类和零件数量均相同的供应商
A:自连接 + 两个子查询保证“连接后数量不变”
select sup1.sup, sup2.sup -- 不能用distinct,需要给后面count(*)留空间
from SupParts sup1, SupParts sup2
where sup1.sup < sup2.sup and sup1.part = sup2.part -- 零件种类相同
group by sup1.sup, sup2.sup
having count(*) = (select count(*) from SupParts sup3 -- 零件数量相同
where sup3.sup = sup1.sup) -- 连接后sup1出现次数与原来一致
and count(*) = (select count(*) from SupParts sup4
where sup4.sup = sup2.sup); -- 连接后sup2出现次数与原来一致
MICK[日] 《SQL进阶教程》 ↩︎