关联子查询行间比较
越前须知(雾)
- 本系列参考《SQL进阶教程》1,DBMS选用MySQL。
- 本系列不涉及数据库安装与基础语句,对初学者存在一定门槛;基础知识建议阅读《SQL必知必会(第四版)》与《SQL基础教程(第二版)》。
具体用法
与最近一年比较营收
- 创表
-- 时间轴连续
CREATE TABLE Sales
(year INTEGER NOT NULL ,
sale INTEGER NOT NULL ,
PRIMARY KEY (year));
INSERT INTO Sales VALUES (1990, 50);
INSERT INTO Sales VALUES (1991, 51);
INSERT INTO Sales VALUES (1992, 52);
INSERT INTO Sales VALUES (1993, 52);
INSERT INTO Sales VALUES (1994, 50);
INSERT INTO Sales VALUES (1995, 50);
INSERT INTO Sales VALUES (1996, 49);
INSERT INTO Sales VALUES (1997, 55);
-- 时间轴断裂
CREATE TABLE Sales2
(year INTEGER NOT NULL ,
sale INTEGER NOT NULL ,
PRIMARY KEY (year));
INSERT INTO Sales2 VALUES (1990, 50);
INSERT INTO Sales2 VALUES (1992, 50);
INSERT INTO Sales2 VALUES (1993, 52);
INSERT INTO Sales2 VALUES (1994, 55);
INSERT INTO Sales2 VALUES (1997, 55);
- 时间轴连续:与上一年营收数据比较
Q:寻找与上年持平年份&营收
A:子查询 / 自连接 / EXISTS
-- 子查询
select year, sale
from Sales S1 -- 用S2.year = S1.year - 1 偏移一行关联
where sale = (select S2.sale
from Sales S2
where S2.year = S1.year - 1)
order by year;
-- 自连接
select S1.year, S1.sale
from Sales S1, Sales S2
where S1.sale = S2.sale
and S1.year = S2.year + 1
order by year;
-- EXISTS
select year, sale
from Sales S1
where exists (select *
from Sales S2
where S1.year = S2.year + 1
and S1.sale = S2.sale);
Q:与上年比较“增长、持平或下降”
A:CASE + 子查询 / 自连接
- 子查询
select S1.year, S1.sale,
case when S1.sale = (select S2.sale
from Sales S2
where S1.year = S2.year+1) then '👉'
when S1.sale > (select S2.sale
from Sales S2
where S1.year = S2.year+1) then '👆'
when S1.sale < (select S2.sale
from Sales S2
where S1.year = S2.year+1) then '👇'
else '--' end as 'trend'
from Sales S1
order by year;
-- 简洁版本:SIGN()判断正负号
select S1.year, S1.sale,
case sign(sale - (select sale from Sales S2 where S1.year = S2.year + 1))
when 0 then '👉'
when 1 then '👆'
when -1 then '👇'
else '--' end as 'trend'
from Sales S1
order by year;
- 自连接
select S1.year, S1.sale,
case when S1.sale = S2.sale then '👉'
when S1.sale > S2.sale then '👆'
when S1.sale < S2.sale then '👇'
else '--' end as 'trend'
from Sales S1, Sales S2
where S1.year = S2.year + 1
order by S1.year;
- 时间轴断裂:与过去最近时间营收数据比较
Q:与最近时间营收数据持平的年份&营收
A:子查询 / 自连接 / EXISTS
-- 子查询
select year, sale
from Sales2 S1
where sale = (select sale
from Sales2 S2
where S2.year = (select max(S3.year)
from Sales2 S3 where S1.year > S3.year))
order by year;
-- 自连接,减少一个子查询
select S1.year as year, S1.sale as sale
from Sales2 S1, Sales2 S2
where S1.sale = S2.sale
and S2.year = (select max(year)
from Sales2 S3
where S1.year > S3.year)
order by S1.year;
-- EXISTS
select S1.year, S1.sale
from Sales2 S1
where exists (select *
from Sales2 S2
where S1.sale = S2.sale
and S2.year = (select max(year)
from Sales2 S3
where S1.year > S3.year));
Q:与上年比较增长幅度
A:简单的自连接存在缺陷:筛除最早年份1990年
- 自连接:缺少1990年
select S2.year as pre_year, S1.year as now_year,
S2.sale as pre_sale, S1.sale as now_sale,
S1.sale - S2.sale as diff
from Sales2 S1, Sales2 S2
where S2.year = (select max(year)
from Sales2 S3
where S1.year > S3.year)
order by now_year;
-- now_year排除了1990,因为没有比1990早的数据
- 自外连接:出现全部年份
select S2.year as pre_year, S1.year as now_year,
S2.sale as pre_sale, S1.sale as now_sale,
S1.sale - S2.sale as diff
from Sales2 S1 left outer join Sales2 S2 -- 涵盖1990
on S2.year = (select max(year)
from Sales2 S3
where S1.year > S3.year)
order by now_year;
移动平均值和移动累计值
- 创表
CREATE TABLE Accounts
(prc_date DATE NOT NULL ,
prc_amt INTEGER NOT NULL ,
PRIMARY KEY (prc_date)) ;
INSERT INTO Accounts VALUES ('2006-10-26', 12000 );
INSERT INTO Accounts VALUES ('2006-10-28', 2500 );
INSERT INTO Accounts VALUES ('2006-10-31', -15000 );
INSERT INTO Accounts VALUES ('2006-11-03', 34000 );
INSERT INTO Accounts VALUES ('2006-11-04', -5000 );
INSERT INTO Accounts VALUES ('2006-11-06', 7200 );
INSERT INTO Accounts VALUES ('2006-11-11', 11000 );
Q:累计值
A:窗口函数 / 子查询
- 窗口函数
-- 窗口函数
select prc_date, prc_amt,
sum(prc_amt) over (order by prc_date) as onhand_amt
from Accounts;
- 子查询
select prc_date, prc_amt,
(select sum(prc_amt) from Accounts A2
where A1.prc_date >= A2.prc_date) as onhand_smt
from Accounts A1
order by prc_date;
Q:移动平均值,且只计算往前数2行(加本行3行)的平均值
A:窗口函数 / 子查询
- 窗口函数
-- 滑动前两行平均:rows 2 preceding
select prc_date, prc_amt,
sum(prc_amt) over (order by prc_date rows 2 preceding) as onhand_amt
from Accounts;
-- 滑动前一行与后一行平均:rows between 1 preceding and 1 following
select prc_date, prc_amt,
sum(prc_amt) over (order by prc_date
rows between 1 preceding and 1 following) as avg_amt
from Accounts;
- 子查询
select prc_date, A1.prc_amt,
(select sum(prc_amt) from Accounts A2
where A1.prc_date >= A2.prc_date
and (select count(*) from Accounts A3
where A3.prc_date between A2.prc_date and A1.prc_date) <= 3) as avg_sum
from Accounts A1
order by prc_date;
-- 子查询:不满3行数据不显示
select prc_date, A1.prc_amt,
(select avg(A2.prc_amt) from Accounts A2
where A1.prc_date >= A2.prc_date
and (select count(*)
from Accounts A3
where A3.prc_date between A2.prc_date and A1.prc_date) <=3
having count(*) = 3) as avg_amt -- prc_amt必须为三行
from Accounts A1
order by A1.prc_date;
查询重叠的时间区间
- 创表
CREATE TABLE Reservations
(reserver VARCHAR(30) PRIMARY KEY,
start_date DATE NOT NULL,
end_date DATE NOT NULL);
INSERT INTO Reservations VALUES('木村', '2006-10-26', '2006-10-27');
INSERT INTO Reservations VALUES('荒木', '2006-10-28', '2006-10-31');
INSERT INTO Reservations VALUES('堀', '2006-10-31', '2006-11-01');
INSERT INTO Reservations VALUES('山本', '2006-11-03', '2006-11-04');
INSERT INTO Reservations VALUES('内田', '2006-11-03', '2006-11-05');
INSERT INTO Reservations VALUES('水谷', '2006-11-06', '2006-11-06');
Q:查询日期与别人重叠的预定
A:见下图:别人的入住/离店日期在自己的预定中
select reserver, start_date, end_date
from Reservations R1
where exists (select * from Reservations R2
where R1.reserver <> R2.reserver
and ( R1.start_date between R2.start_date and R2.end_date
or R1.end_date between R2.start_date and R2.end_date));
Q:若山本入住时间为4日,上述重叠时间的查询无法查询到内田
A:加入限制:自己的整段入住时间在别的预定中
DELETE FROM Reservations WHERE reserver = '山本';
INSERT INTO Reservations VALUES('山本', '2006-11-04', '2006-11-04');
select reserver, start_Date, end_date from Reservations R1
where exists (select *
from Reservations R2
where R1.reserver <> R2.reserver
and ((R1.start_date between R2.start_date and R2.start_date
or R1.end_date between R2.start_date and R2.end_date)
or (R2.start_date between R1.start_date and R1.end_date
or R2.end_date between R1.start_date and R1.end_date)));
MICK[日] 《SQL进阶教程》 ↩︎