外连接
越前须知(雾)
- 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
- 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
具体用法
集合运算
乘法运算
- 创表
创表
CREATE TABLE Items
(item_no INTEGER PRIMARY KEY,
item VARCHAR(32) NOT NULL);
INSERT INTO Items VALUES(10, 'FD');
INSERT INTO Items VALUES(20, 'CD-R');
INSERT INTO Items VALUES(30, 'MO');
INSERT INTO Items VALUES(40, 'DVD');
CREATE TABLE SalesHistory
(sale_date DATE NOT NULL,
item_no INTEGER NOT NULL,
quantity INTEGER NOT NULL,
PRIMARY KEY(sale_date, item_no));
INSERT INTO SalesHistory VALUES('2007-10-01', 10, 4);
INSERT INTO SalesHistory VALUES('2007-10-01', 20, 10);
INSERT INTO SalesHistory VALUES('2007-10-01', 30, 3);
INSERT INTO SalesHistory VALUES('2007-10-03', 10, 32);
INSERT INTO SalesHistory VALUES('2007-10-03', 30, 12);
INSERT INTO SalesHistory VALUES('2007-10-04', 20, 22);
INSERT INTO SalesHistory VALUES('2007-10-04', 30, 7);
Q:以商品为单位汇总各自销量
A:两种方法:先聚合后连接 / 先连接后聚合
- 先聚合后连接,性能较差
select I.item_no, SH.quantity
from Items I left outer join
(select item_no, sum(quantity) as quantity
from SalesHistory
group by item_no) SH
on I.item_no = SH.item_no;
- 先连接后聚合
一对一或一对多关系的两个集合,连接后行数不会增加
select I.item_no, sum(SH.quantity)
from Items I left outer join SalesHistory SH
on I.item_no = SH.item_no
group by I.item_no;
全外连接
- 创表
CREATE TABLE Class_A
(id char(1),
name varchar(30),
PRIMARY KEY(id));
CREATE TABLE Class_B
(id char(1),
name varchar(30),
PRIMARY KEY(id));
INSERT INTO Class_A (id, name) VALUES('1', '田中');
INSERT INTO Class_A (id, name) VALUES('2', '铃木');
INSERT INTO Class_A (id, name) VALUES('3', '伊集院');
INSERT INTO Class_B (id, name) VALUES('1', '田中');
INSERT INTO Class_B (id, name) VALUES('2', '铃木');
INSERT INTO Class_B (id, name) VALUES('4', '西园寺');
Q:生成如下表格
A:MySQL 不支持 full outer join,用 left outer join + right outer join
-- MySQL不支持
select coalesce(a.id, b.id) as id,
a.name, b.name
from Class_A a full outer join Class_B b
on a.id = b.id;
-- 行记录累加:left outer join + right outer join
select a.id as id, a.name, b.name
from Class_A a left outer join Class_B b on a.id = b.id
union
select b.id as id, a.name, b.name
from Class_A a right outer join Class_B b on a.id = b.id;
Q:查询不跟 B 班重复的 A 班学生
A:Class_A - Class_B
-- 全外连接
select a.id as id, a.name as name
from Class_A a left outer join Class_B b
on a.id = b.id
where b.name is null;
-- not exists
select a.id as id, a.name as name
from Class_A a
where not exists (select *
from Class_B b
where a.id = b.id);
格式转换
行→列:制作交叉表
- 创表
CREATE TABLE Courses
(name VARCHAR(32),
course VARCHAR(32),
PRIMARY KEY(name, course));
INSERT INTO Courses VALUES('赤井', 'SQL入门');
INSERT INTO Courses VALUES('赤井', 'UNIX基础');
INSERT INTO Courses VALUES('铃木', 'SQL入门');
INSERT INTO Courses VALUES('工藤', 'SQL入门');
INSERT INTO Courses VALUES('工藤', 'Java中级');
INSERT INTO Courses VALUES('吉田', 'UNIX基础');
INSERT INTO Courses VALUES('渡边', 'SQL入门');
Q:生成以下交叉表
A:CASE(三种方法) / 标量子查询
- CASE + 外连接
select C0.name, -- 先连接再筛选字段
case when C1.name is not null then '〇' else null end as 'SQL入门',
case when C2.name is not null then '〇' else null end as 'UNIX基础',
case when C3.name is not null then '〇' else null end as 'Java中级'
from (select distinct name from Courses) C0
left outer join -- A left outer join B on XXX
(select name from Courses where course = 'SQL入门') C1 on C1.name = C0.name
left outer join
(select name from Courses where course = 'UNIX基础') C2 on C2.name = C0.name
left outer join
(select name from Courses where course = 'Java中级') C3 on C3.name = C0.name;
- CASE + EXISTS
select distinct name, -- 姓名不重复
case when exists (select * from Courses C1 where C1.name = C0.name and C1.course = 'SQL入门') then '〇'
else null end as 'SQL入门',
case when exists (select * from Courses C2 where C2.name = C0.name and C2.course = 'UNIX基础') then '〇'
else null end as 'UNIX基础',
case when exists (select * from Courses C3 where C3.name = C0.name and C3.course = 'Java中级') then '〇'
else null end as 'Java中级'
from Courses C0;
- CASE + SUM()
select name,
case when sum(case when course = 'SQL入门' then 1 else null end) = 1 then '〇'
else null end as 'SQL入门',
case when sum(case when course = 'UNIX基础' then 1 else null end) = 1 then '〇'
else null end as 'UNIX基础',
case when sum(case when course = 'Java中级' then 1 else null end) = 1 then '〇'
else null end as 'Java中级'
from Courses
group by name;
- 标量子查询
select C0.name, -- 标量: select '〇'
(select '〇' from Courses C1
where C0.name = C1.name and C1.course = 'SQL入门') as 'SQL入门',
(select '〇' from Courses C2
where C0.name = C2.name and C2.course = 'UNIX基础') as 'UNIX基础',
(select '〇' from Courses C3
where C0.name = C3.name and C3.course = 'Java中级') as 'Java中级'
from (select distinct name from Courses) C0;
列→行:汇总重复项于一列
- 创表
CREATE TABLE Personnel
(employee varchar(32),
child_1 varchar(32),
child_2 varchar(32),
child_3 varchar(32),
PRIMARY KEY(employee));
INSERT INTO Personnel VALUES('赤井', '一郎', '二郎', '三郎');
INSERT INTO Personnel VALUES('工藤', '春子', '夏子', NULL);
INSERT INTO Personnel VALUES('铃木', '夏子', NULL, NULL);
INSERT INTO Personnel VALUES('吉田', NULL, NULL, NULL);
CREATE TABLE TblSex
(sex_cd char(1),
sex varchar(5),
PRIMARY KEY(sex_cd));
Q:将横向表格转化为纵向,即只有员工及其孩子两列
A:两种方法:UNION ALL / 创建视图
- UNION ALL
select employee, child_1 as child from Personnel
union all
select employee, child_2 as child from Personnel
union all
select employee, child_3 as child from Personnel;
-- 吉田没有孩子,会显示三行null
2. 创建视图
create view Children(child)
as select child_1 from Personnel
union
select child_2 from Personnel
union
select child_3 from Personnel;
select emp.employee, ch.child
from Personnel emp left outer join Children ch
on ch.child in (emp.child_1, emp.child_2, emp.child_3)
order by emp.employee;
嵌套式表侧栏
- 创表
CREATE TABLE TblSex
(sex_cd char(1),
sex varchar(5),
PRIMARY KEY(sex_cd));
CREATE TABLE TblAge
(age_class char(1),
age_range varchar(30),
PRIMARY KEY(age_class));
CREATE TABLE TblPop
(pref_name varchar(30),
age_class char(1),
sex_cd char(1),
population integer,
PRIMARY KEY(pref_name, age_class,sex_cd));
INSERT INTO TblSex (sex_cd, sex ) VALUES('m', '男');
INSERT INTO TblSex (sex_cd, sex ) VALUES('f', '女');
INSERT INTO TblAge (age_class, age_range ) VALUES('1', '21岁〜30岁');
INSERT INTO TblAge (age_class, age_range ) VALUES('2', '31岁〜40岁');
INSERT INTO TblAge (age_class, age_range ) VALUES('3', '41岁〜50岁');
INSERT INTO TblPop VALUES('秋田', '1', 'm', 400 );
INSERT INTO TblPop VALUES('秋田', '3', 'm', 1000 );
INSERT INTO TblPop VALUES('秋田', '1', 'f', 800 );
INSERT INTO TblPop VALUES('秋田', '3', 'f', 1000 );
INSERT INTO TblPop VALUES('青森', '1', 'm', 700 );
INSERT INTO TblPop VALUES('青森', '1', 'f', 500 );
INSERT INTO TblPop VALUES('青森', '3', 'f', 800 );
INSERT INTO TblPop VALUES('东京', '1', 'm', 900 );
INSERT INTO TblPop VALUES('东京', '1', 'f', 1500 );
INSERT INTO TblPop VALUES('东京', '3', 'f', 1200 );
INSERT INTO TblPop VALUES('千叶', '1', 'm', 900 );
INSERT INTO TblPop VALUES('千叶', '1', 'f', 1000 );
INSERT INTO TblPop VALUES('千叶', '3', 'f', 900 );
Q:计算每个员工子女数量
A:Count(ch.child)
select emp.employee, count(ch.child) as child_cnt
from Personnel emp left outer join Children ch
on ch.child in (emp.child_1, emp.child_2, emp.child_2)
group by emp.employee;
Q:转化下图表格
A:需要保证侧边栏完整,而TblPop表中无年龄层 2 的记录
- 错误示例
-- TblPop中没有年龄层2的记录,如果仅用外连接,年龄层2缺失
select master1.age_range, master2.sex, data.pop_tohoku, data.pop_kanto
from (select age_class, sex_cd,
sum(case when pref_name in ('秋田','青森') then population
else null end) as pop_tohoku,
sum(case when pref_name in ('东京','千叶') then population
else null end) as pop_kanto
from TblPop
group by age_class, sex_cd) data
right outer join TblAge master1
on data.age_class = master1.age_class
right outer join TblSex master2
on data.sex_cd = master2.sex_cd;
- 正确方法:笛卡尔积
select master.age_class, master.sex_cd,
data.pop_tohoku, data.pop_kanto
from (select age_class, sex_cd from TblAge cross join TblSex) master
left outer join
(select age_class, sex_cd,
sum(case when pref_name in ('秋田','青森') then population
else null end) as pop_tohoku,
sum(case when pref_name in ('东京','千叶') then population
else null end) as pop_kanto
from TblPop
group by age_class, sex_cd) data
on master.age_class = data.age_class and master.sex_cd = data.sex_cd
order by age_class, sex_cd;
- 简洁版本:去掉一个视图
select master.age_class, master.sex_cd,
sum(case when pref_name in ('秋田','青森') then population
else null end) as pop_tohoku,
sum(case when pref_name in ('东京','千叶') then population
else null end) as pop_kanto
from (select age_class, sex_cd from TblAge cross join TblSex) master
left outer join TblPop data -- TblPop本身就是data表
on master.age_class = data.age_class and master.sex_cd = data.sex_cd
group by master.age_class, master.sex_cd
order by master.age_class, master.sex_cd;
MICK[日] 《SQL进阶教程》 ↩︎