外连接的用法 -- 《SQL进阶教程》 jupyter note

import pandas as pd
import sqlite3
conn = sqlite3.connect('1-5.db')

用外连接进行行列转换1(行 -> 列): 制作交叉表

怎么使用outer join,将row转换成column

下面的方式一,使用的是外连接的方法。但是效果却是最差的。

# create Course Table and insert data
conn.execute("""
    CREATE TABLE IF NOT EXISTS Courses (
        name VARCHAR(10) NOT NULL, 
        course VARCHAR(10),
        PRIMARY KEY(name, course)
    ) 
""")
conn.execute("""DELETE FROM Courses;""")
conn.execute("""
    INSERT INTO Courses(name, course) 
    VALUES 
        ('赤井', 'SQL入门'),
        ('赤井', 'UNIX基础'),
        ('铃木', 'SQL入门'),
        ('工藤', 'SQL入门'),
        ('工藤', 'Java中级'),
        ('吉田', 'UNIX基础'),
        ('渡边', 'SQL入门')
""")
pd.read_sql_query('SELECT * FROM Courses', conn)
 namecourse
0赤井SQL入门
1赤井UNIX基础
2铃木SQL入门
3工藤SQL入门
4工藤Java中级
5吉田UNIX基础
6渡边SQL入门
# 方式一: 每个要转换的列都使用一
pd.read_sql_query("""
    SELECT c0.name,
        CASE WHEN c1.name IS NOT NULL THEN 'O' ELSE 'X' END AS 'SQL入门',
        CASE WHEN c2.name IS NOT NULL THEN 'O' ELSE 'X' END AS 'UNIX基础',
        CASE WHEN c3.name is NOT NULL THEN 'O' ELSE 'X' END AS 'Java中级'
    FROM (SELECT DISTINCT name FROM Courses) C0
    LEFT OUTER JOIN
        (SELECT name FROM Courses WHERE course='SQL入门') C1
        ON c0.name = c1.name
    LEFT OUTER JOIN
        (SELECT name FROM Courses WHERE course='UNIX基础') C2
        ON c0.name = c2.name
    LEFT OUTER JOIN
        (SELECT name FROM Courses WHERE course='Java中级') c3
        ON c0.name = c3.name;
""", conn)
 nameSQL入门UNIX基础Java中级
0吉田XOX
1工藤OXO
2渡边OXX
3赤井OOX
4铃木OXX
# 方式二: 使用标量子查询代替外连接
# 优点是,加入新的列,只需要修改SELECT下的子查询。而方式一需要修改SELECT和FROM两处.
# 缺点是,在SELECT中使用标量子查询,开销比较大。因为需要对每一行都进行一次或多次子查询.
pd.read_sql_query("""
    SELECT c0.name,
        (SELECT 'O'
         FROM Courses AS c1
         WHERE course='SQL入门' AND c0.name = c1.name) AS 'SQL入门',
        (SELECT 'O'
         FROM Courses AS c2
         WHERE course='UNIX基础' AND c0.name = c2.name) AS 'UNIX基础',
        (SELECT 'O'
         FROM Courses AS c3
         WHERE course='Java中级' AND c0.name = c3.name) AS 'Java中级'
    FROM (SELECT DISTINCT name FROM Courses) AS c0;
""", conn).replace(to_replace=[None], value='X')
 nameSQL入门UNIX基础Java中级
0吉田XOX
1工藤OXO
2渡边OXX
3赤井OOX
4铃木OXX
# 方式三: 嵌套使用CASE表单是
pd.read_sql_query("""
    SELECT name,
      CASE WHEN SUM(CASE WHEN course='SQL入门' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'SQL入门',
      CASE WHEN SUM(CASE WHEN course='UNIX基础' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'UNIX基础',
      CASE WHEN SUM(CASE WHEN course='Java中级' THEN 1 ELSE 0 END) = 1 THEN 'O' ELSE 'X' END AS 'Java中级'
    FROM Courses
    GROUP BY name
""", conn)
 nameSQL入门UNIX基础Java中级
0吉田XOX
1工藤OXO
2渡边OXX
3赤井OOX
4铃木OXX

用外连接进行行列转换2(列 -> 行): 汇总重复项于一列

# create Personnel Table and insert data
conn.execute("""
    CREATE TABLE IF NOT EXISTS Personnel (
        employee VARCHAR(10) NOT NULL PRIMARY KEY,
        child_1 VARCHAR(10),
        child_2 VARCHAR(10),
        child_3 VARCHAR(10)
    ) 
""")
conn.execute("""DELETE FROM Personnel;""")
conn.execute("""
    INSERT INTO Personnel(employee, child_1, child_2, child_3) 
    VALUES 
        ('赤井', '一郎', '二郎', '三郎'),
        ('工藤', '春子', '夏子', null),
        ('铃木', '夏子', null, null),
        ('吉田', null, null, null)
""")
pd.read_sql_query('SELECT * FROM Personnel', conn)
 employeechild_1child_2child_3
0赤井一郎二郎三郎
1工藤春子夏子None
2铃木夏子NoneNone
3吉田NoneNoneNone
# 这个方法,会把child为null的结果也返回。
# 但是如果排除掉child为null的结果,那么吉田这个employee因为没有孩子,就不会出现在结果表里面
pd.read_sql_query("""
    SELECT employee, child_1 AS child FROM Personnel
    UNION ALL
    SELECT employee, child_2 AS child FROM Personnel
    UNION ALL
    SELECT employee, child_3 AS child FROM Personnel
""", conn)
 employeechild
0赤井一郎
1工藤春子
2铃木夏子
3吉田None
4赤井二郎
5工藤夏子
6铃木None
7吉田None
8赤井三郎
9工藤None
10铃木None
11吉田None
conn.execute("""
    CREATE VIEW IF NOT EXISTS Children(child) AS
        SELECT child_1 FROM Personnel
        UNION
        SELECT child_2 FROM Personnel
        UNION
        SELECT child_3 FROM Personnel
""")

pd.read_sql_query("""
    SELECT EMP.employee, CHILDREN.child
    FROM Personnel EMP
        LEFT OUTER JOIN Children
            ON Children.child IN (EMP.child_1, EMP.child_2, EMP.child_3)
""", conn)
 employeechild
0赤井一郎
1赤井三郎
2赤井二郎
3工藤夏子
4工藤春子
5铃木夏子
6吉田None
# 习题1-5-2: 求每个雇员的孩子数量
pd.read_sql_query("""
    SELECT EMP.employee, COUNT(CHILDREN.child) AS child_cnt
    FROM Personnel EMP
        LEFT OUTER JOIN Children
            ON Children.child IN (EMP.child_1, EMP.child_2, EMP.child_3)
    GROUP BY EMP.employee
""", conn)
 employeechild_cnt
0吉田0
1工藤2
2赤井3
3铃木1

在交叉表里制作嵌套式表侧栏

# 创建相关的表
conn.execute("""
    CREATE TABLE IF NOT EXISTS TblAge (
        age_class integer PRIMARY KEY,
        age_range varchar(20)
    );
""")

conn.execute("""
    CREATE TABLE IF NOT EXISTS TblSex (
        sex_cd char(1) PRIMARY KEY,
        sex char(1)
    );
""")

conn.execute("""
    CREATE TABLE IF NOT EXISTS TblPop (
        pref_name VARCHAR(10),
        age_class integer,
        sex_cd char(1),
        population integer,
        
        PRIMARY KEY (pref_name, age_class, sex_cd),
        FOREIGN KEY (age_class) REFERENCES TblAge(age_class),
        FOREIGN KEY (sex_cd) REFERENCES TblSex(sex_cd)
    );
""")

# 确保没有数据
conn.execute("""DELETE FROM TblPop""")
conn.execute("""DELETE FROM TblSex""")
conn.execute("""DELETE FROM TblAge""")

# 插入数据
conn.execute("""
    INSERT INTO TblAge(age_class, age_range) VALUES
        (1, '21岁 ~ 30岁'),
        (2, '31岁 ~ 40岁'),
        (3, '41岁 ~ 50岁')
""")

conn.execute("""
    INSERT INTO TblSex(sex_cd, sex) VALUES
        ('m', '男'),
        ('f', '女')
""")

conn.execute("""
    INSERT INTO TblPop(pref_name, age_class, sex_cd, population) VALUES
        ('秋田', 1, 'm', 400),
        ('秋田', 3, 'm', 1000),
        ('秋田', 1, 'f', 800),
        ('秋田', 3, 'f', 1000),
        ('青森', 1, 'm', 700),
        ('青森', 1, 'f', 500),
        ('青森', 3, 'f', 800),
        ('东京', 1, 'm', 1500),
        ('东京', 1, 'f', 1200),
        ('千叶', 1, 'm', 900),
        ('千叶', 1, 'f', 1000),
        ('千叶', 3, 'f', 900)
""")
<sqlite3.Cursor at 0x113c47180>
pd.read_sql("""
    SELECT * FROM TblAge
""", conn)
 age_classage_range
0121岁 ~ 30岁
1231岁 ~ 40岁
2341岁 ~ 50岁
pd.read_sql("""
    SELECT * FROM TblSex
""", conn)
 sex_cdsex
0m
1f
pd.read_sql("""
    SELECT * FROM TblPop
""", conn)
 pref_nameage_classsex_cdpopulation
0秋田1m400
1秋田3m1000
2秋田1f800
3秋田3f1000
4青森1m700
5青森1f500
6青森3f800
7东京1m1500
8东京1f1200
9千叶1m900
10千叶1f1000
11千叶3f900
# 下面这种,以两张表作为外连接操作的方式。
# 有时候并不能满足需求,比如不会显示age_class=2的情况,因为没有对应的人口数据
pd.read_sql("""
    SELECT MASTER1.age_class AS age_class,
           MASTER2.sex_cd AS sex_cd,
           DATA.pop_tohoku AS pop_tohoku,
           DATA.pop_kanto AS pop_kanto
    FROM (SELECT age_class, sex_cd,
                 SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,
                 SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto
          FROM TblPop
          GROUP BY age_class, sex_cd) AS DATA
      LEFT OUTER JOIN TblAge MASTER1
          ON MASTER1.age_class = DATA.age_class
      LEFT OUTER JOIN TblSex MASTER2
          ON MASTER2.sex_cd = DATA.sex_cd
""", conn)
 age_classsex_cdpop_tohokupop_kanto
01f13002200.0
11m11002400.0
23f1800900.0
33m1000NaN
# 解决方式是:调整为一次外连接
# 外连接的表是 TblAge和TblSex 的笛卡尔积,也就是把它们用交叉连接(CROSS JOIN)组合起来,
# 对于不支持CROSS JOIN的库,分别把两个表用FROM引入,也是一样的.
# 同时,请注意下面把临时表放在了 LEFT OUTER JOIN 的左边,充分发挥OUTER JOIN的作用.
pd.read_sql("""
    SELECT MASTER.age_class AS age_class,
           MASTER.sex_cd    AS sex_cd,
           DATA.pop_tohoku  AS pop_tohoku,
           DATA.pop_kanto   AS pop_kanto
    FROM (SELECT age_class, sex_cd
          FROM TblAge CROSS JOIN TblSex) AS MASTER
      LEFT OUTER JOIN
              (SELECT age_class, sex_cd,
                         SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,
                         SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto
              FROM TblPop
              GROUP BY age_class, sex_cd) AS DATA
          ON MASTER.age_class = DATA.age_class
          AND MASTER.sex_cd = DATA.sex_cd
      
""", conn)
 age_classsex_cdpop_tohokupop_kanto
01f1300.02200.0
11m1100.02400.0
22fNaNNaN
32mNaNNaN
43f1800.0900.0
53m1000.0NaN
# 更优的解决方法是:把JOIN看作乘法运算
# 关键在于,TblPop可以看作就是DATA。然后 MASTER -> DATA 的关系,就是一对多关系了
pd.read_sql("""
    SELECT MASTER.age_class AS age_class,
           MASTER.sex_cd    AS sex_cd,
           SUM(CASE WHEN pref_name IN ('青森', '秋田') THEN population ELSE NULL END) AS pop_tohoku,
           SUM(CASE WHEN pref_name IN ('东京', '千叶') THEN population ELSE NULL END) AS pop_kanto
    FROM (SELECT age_class, sex_cd
          FROM TblAge CROSS JOIN TblSex) AS MASTER
      LEFT OUTER JOIN TblPop AS DATA
      ON MASTER.age_class = DATA.age_class
      AND MASTER.sex_cd = DATA.sex_cd
    GROUP BY MASTER.age_class, MASTER.sex_cd
""", conn)
 age_classsex_cdpop_tohokupop_kanto
01f1300.02200.0
11m1100.02400.0
22fNaNNaN
32mNaNNaN
43f1800.0900.0
53m1000.0NaN

用外连接做集合运算

各个数据库的集合运算实现都不尽相同,参差不齐。

集合运算符会尽心排序,所以可能带来性能上的问题。

因此,了解一下集合运算符的替代方案还是有意义的。

# 创建数据库
conn.execute("""
    CREATE TABLE IF NOT EXISTS Class_A (
        id integer PRIMARY KEY AUTOINCREMENT,
        name varchar(10)
    );
""")
conn.execute("""   
    CREATE TABLE IF NOT EXISTS Class_B (
        id integer PRIMARY KEY AUTOINCREMENT,
        name varchar(10)
    )
""")

# 插入数据
conn.execute("""DELETE FROM Class_A;""")
conn.execute("""DELETE FROM Class_B;""")
conn.execute("""
    INSERT INTO Class_A(id, name) VALUES 
        (1, '田中'),
        (2, '铃木'),
        (3, '伊集院')
""")
conn.execute("""
    INSERT INTO Class_B(id, name) VALUES
        (1, '田中'),
        (2, '铃木'),
        (4, '西院寺')
""")
<sqlite3.Cursor at 0x113f3c5e0>
pd.read_sql("""
    SELECT * FROM Class_A
""", conn)
 idname
01田中
12铃木
23伊集院
pd.read_sql("""
    SELECT * FROM Class_B
""", conn)
 idname
01田中
12铃木
24西院寺

用外连接求差集: A - B

pd.read_sql("""
    SELECT A.id id, A.name AS A_name
    FROM Class_A AS A LEFT OUTER JOIN Class_B AS B
    ON A.id = B.id
    WHERE B.name IS NULL
""", conn)
 idA_name
03伊集院

用外连接求差集: B - A

pd.read_sql("""
    SELECT B.id id, B.name AS B_name
    FROM Class_B AS B LEFT OUTER JOIN Class_A AS A
    ON A.id = B.id
    WHERE A.name IS NULL
""", conn)
 idB_name
04西院寺

用外连接求异或集

pd.read_sql("""
    SELECT A.id id, A.name AS name
    FROM Class_A AS A LEFT OUTER JOIN Class_B AS B
    ON A.id = B.id
    WHERE B.name IS NULL
    
    UNION
    
    SELECT B.id id, B.name AS name
    FROM Class_B AS B LEFT OUTER JOIN Class_A AS A
    ON A.id = B.id
    WHERE A.name IS NULL
""", conn)
 idname
03伊集院
14西院寺

转载于:https://www.cnblogs.com/thomaszdxsn/p/15-wai-lian-jie-de-yong-fa--sql-jin-jie-jiao-cheng.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值