20190522更新新的子查询问题
20190422更新 7,8题着重看一下
下面着重讲一下 相关子查询
应用1:
为什么说相关呢,因为两个表是有连接关系在的,比如
这里注意对于O1的每一个orderid,我在O2里面保证首先orderid一样,然后寻找满足条件后的MAX (orderid)
这里确定了custid之间是对应的
所以这个查询是:返回每个客户最大订单ID的订单状况
延伸:
返回每个订单当前订单值占该客户(体现:o1.custid=o2.custid)所有订单值的占百分比
应用2:上一个(返回小于当前值的最大值)和下一个(大于当前值的最小值)
返回小于当前值的最大值
大于当前值的最小值
应用3:逐行累加:聚合
###################################################课后习题
#1. 返回orders表中可以查到的活动最后一天所下的所有订单
SELECT so.orderid,so.orderdate,so.custid,so.empid
FROM Sales.Orders AS so
WHERE so.orderdate = (
SELECT MAX(orderdate)
FROM Sales.Orders);
#2. 返回订单数量最多的客户的所有订单。注意,在一个以上的客户可能具有相同数量的订单
SELECT so.custid,so.orderid,so.orderdate,so.empid
FROM Sales.Orders AS so
WHERE so.custid IN (
SELECT tempt1.custid
FROM(
SELECT custid,COUNT(orderid) AS number
FROM Sales.Orders
GROUP BY custid) AS tempt1
WHERE tempt1.number =(
SELECT MAX(tempt.number)
FROM(
SELECT custid,COUNT(orderid) AS number
FROM Sales.Orders
GROUP BY custid) AS tempt
)
);
#3.返回2008年5月1日或之后没有下订单的雇员
SELECT he.empid,he.Firstname,he.lastname
FROM HR.Employees AS he
WHERE he.empid NOT IN (
SELECT s.empid
FROM Sales.Orders
WHERE s.orderdate >= '20180501');
#4.返回有客户但是没有雇员的国家alter
SELECT DISTINCT sc.country
FROM Sales.Customers AS sc
WHERE sc.country NOT IN (
SELECT he.country
FROM HR.Employees);
#5.返回每个客户活动最后一天下的所有订单
SELECT so.custid,so.orderid,so.orderdate,so.empid
FROM Sales.Orders AS so
WHERE so.orderdate = (SELECT MAX(so2.orderdate)
FROM Sales.Orders AS so2
WHERE so2.custid = so.custid);
#6.返回2007年下订单但是2008年没下订单的客户
SELECT sc.custid,sc.companyname
FROM Sales.Customers AS sc
WHERE sc.orderid IN (SELECT orderid
FROM Sales.Orders
WHERE orderdate between ('20070101','20071230')
AND orderdate NOT IN ('20080101','20081230')
);
# 答案做法
#7.返回订购了产品12的客户
SELECT sc.custid,sc.companyname
FROM Sales.Customers AS sc
JOIN Sales.Orders AS so
ON sc.custid = so.custid
JOIN Sales.OrderDetails AS sod
ON so.orderid = sod.orderid
WHERE sod.productid = 12;
# 答案做法:使用exist子查询
SELECT sc.custid,sc.companyname
FROM Sales.Customers AS sc
WHERE EXISTS(SELECT * FROM Sales.Orders so
WHERE sc.custid = so.custid
AND EXISTS(
SELECT * FROM Sales.OrderDetails AS sod
WHERE so.orderid = sod.orderid
AND sod.productid = 12));
#8.查询每个客户及其月度的采购总量
# 答案做法
SELECT O1.custid,O1.ordermonth,O1.qty,(
SELECT SUM(O2.qty)
FROM Sales.CustOrders AS O2
WHERE O2.custid = O1.custid
AND O2.ordermonth <= O1.ordermonth) AS runqty
FROM Sales.Customers AS O1
ORDER BY custid,ordermonth;
#################自己反正做错啦
SELECT tempt.custid,tempt.ordermonth,tempt.qty,
(SELECT SUM(tempt2.qty) FROM Sales.Customers AS O2
WHERE tempt2.ordermonth <= (
SELECT MIN(tempt2.ordermonth) FROM tempt2
WHERE tempt2.ordermonth > tempt.ordermonth)
FROM
FROM Sales.Customers AS O1
重点:7,8
相关子查询后续
DROP TABLE user;
CREATE TABLE user(
id INT NOT NULL,
name VARCHAR(50) NOT NULL,
create_time DATE NOT NULL
);
INSERT INTO user VALUES(1,'A','2019-05-13');
INSERT INTO user VALUES(2,'B','2019-05-12');
INSERT INTO user VALUES(3,'C','2019-05-14');
INSERT INTO user VALUES(4,'D','2019-05-15');
INSERT INTO user VALUES(5,'E','2019-05-17');
INSERT INTO user VALUES(6,'F','2019-05-17');
INSERT INTO user VALUES(7,'G','2019-05-17');
INSERT INTO user VALUES(8,'H','2019-05-20');
INSERT INTO user VALUES(9,'I','2019-05-23');
DROP TABLE oorder;
CREATE TABLE oorder(
id INT NOT NULL,
u_id INT NOT NULL,
create_time DATE NOT NULL
);
INSERT INTO oorder VALUES(1,1,'2019-05-13');
INSERT INTO oorder VALUES(2,1,'2019-05-13');
INSERT INTO oorder VALUES(3,1,'2019-05-13');
INSERT INTO oorder VALUES(4,3,'2019-05-13');
INSERT INTO oorder VALUES(5,1,'2019-05-14');
INSERT INTO oorder VALUES(6,4,'2019-05-14');
INSERT INTO oorder VALUES(7,4,'2019-05-14');
INSERT INTO oorder VALUES(8,6,'2019-05-15');
INSERT INTO oorder VALUES(9,1,'2019-05-17');
INSERT INTO oorder VALUES(10,2,'2019-05-17');
INSERT INTO oorder VALUES(11,7,'2019-05-20');
INSERT INTO oorder VALUES(12,7,'2019-05-20');
INSERT INTO oorder VALUES(13,8,'2019-05-20');
INSERT INTO oorder VALUES(14,8,'2019-05-20');
INSERT INTO oorder VALUES(15,8,'2019-05-21');
INSERT INTO oorder VALUES(16,7,'2019-05-21');
一开始我是这样做的
SELECT DISTINCT o.create_time,
(SELECT COUNT(distinct u2.id) FROM user u2 WHERE u2.create_time = o.create_time) AS '新增用户数',
(SELECT COUNT(o2.id) FROM oorder o2 WHERE o2.create_time = o.create_time) AS '当日订单数',
(SELECT COUNT(distinct o3.u_id) FROM oorder o3 WHERE o3.create_time = o.create_time) AS '当日下单用户数'
FROM oorder AS o;
注意如果没有distinct o.create_time会有很多条相同的记录
这样做也对
SELECT o.create_time,
(SELECT COUNT(DISTINCT u.id ) FROM user u WHERE u.create_time=o.create_time),
COUNT(o.id),
COUNT(DISTINCT o.u_id)
FROM oorder o
GROUP BY o.create_time;
但是其实没有考虑到全部的日期
修改之后
SELECT DISTINCT o.create_time,
(SELECT COUNT(distinct u2.id) FROM user u2 WHERE u2.create_time = o.create_time) AS '新增用户数',
(SELECT COUNT(o2.id) FROM oorder o2 WHERE o2.create_time = o.create_time) AS '当日订单数',
(SELECT COUNT(distinct o3.u_id) FROM oorder o3 WHERE o3.create_time = o.create_time) AS '当日下单用户数'
FROM (SELECT DISTINCT create_time FROM oorder
UNION
SELECT DISTINCT create_time FROM user) AS o;
DROP TABLE oorder;
CREATE TABLE oorder(
id INT NOT NULL,
u_id INT NOT NULL,
city_id VARCHAR(15) NOT NULL,
create_time DATE NOT NULL
);
INSERT INTO oorder VALUES(1,1,'a','2019-05-13');
INSERT INTO oorder VALUES(2,1,'a','2019-05-13');
INSERT INTO oorder VALUES(3,1,'a','2019-05-13');
INSERT INTO oorder VALUES(4,3,'a','2019-05-13');
INSERT INTO oorder VALUES(5,1,'b','2019-05-14');
INSERT INTO oorder VALUES(6,4,'b','2019-05-14');
INSERT INTO oorder VALUES(7,4,'b','2019-05-14');
INSERT INTO oorder VALUES(8,6,'c','2019-05-15');
INSERT INTO oorder VALUES(9,1,'a','2019-05-17');
INSERT INTO oorder VALUES(10,2,'c','2019-05-17');
INSERT INTO oorder VALUES(11,7,'a','2019-05-20');
INSERT INTO oorder VALUES(12,7,'a','2019-05-20');
INSERT INTO oorder VALUES(13,8,'c','2019-05-20');
INSERT INTO oorder VALUES(14,8,'c','2019-05-20');
INSERT INTO oorder VALUES(15,8,'c','2019-05-21');
INSERT INTO oorder VALUES(16,7,'a','2019-05-21');
SELECT DISTINCT city_id,
(SELECT COUNT(DISTINCT o2.u_id)
FROM oorder o2
WHERE o2.city_id = o.city_id
AND o2.create_time >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)) AS '最近7日下过单用户数',
(SELECT COUNT(DISTINCT t1.u_id)
FROM (SELECT u_id,MIN(create_time) AS mtime
FROM oorder GROUP BY u_id ) AS t1,
oorder o3
WHERE o3.u_id = t1.u_id
AND o3.city_id = o.city_id
AND t1.mtime >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)) AS '首次下单在近7日下过单'
FROM oorder o;
SELECT u_id,MIN(create_time) AS mtime FROM oorder GROUP BY u_id;
SELECT * FROM
(SELECT u_id,MIN(create_time) AS mtime FROM oorder GROUP BY u_id ) AS t1,oorder o3
WHERE o3.u_id = t1.u_id ;
之前做麻烦了 ,下面这样做很清晰,但是第一题还没想到好办法
select
t.city_id
,count(distinct case when t.create_time >= date_sub('2019-05-20',interval 6 day) then t.u_id else null end)
,count(distinct case when t.first_time >= date_sub('2019-05-20',interval 6 day) then t.u_id else null end)
from
(
select
*
,(select min(create_time) from oorder o1 where o1.u_id = o2.u_id) as first_time
from
oorder o2
) as t
group by t.city_id
注意:第一题这样写是有错误的:特别注意三表join的时候,前两个表join之后已经成为一个整体了,第一个表和第二个表不再是灵活的了,这个时候我连接第三个表,写了两个on条件,一个和第一个表相关,一个和第二个表相关,可想而知,最后肯定会少很多数据,所以三表连接的时候注意只能使用两个表之间的关联关系,不然很容易出错
select
t.create_time
,count(distinct case when u.create_time = t.create_time then u.id else null end)
,count(distinct case when o.create_time = t.create_time then o.id else null end)
,count(distinct case when o.create_time = t.create_time then o.u_id else null end)
from
(
SELECT DISTINCT create_time FROM oorder
UNION
SELECT DISTINCT create_time FROM user
) AS t
left join
oorder as o
on o.create_time = t.create_time
left join
user as u
on u.create_time = t.create_time
and u.id = o.u_id
group by t.create_time;
三表连接之后:
很明显出错了,因为第三个表连接的时候,前面t1和t2的组合已经固定了
我如果第三那个表只连接create_time是这样,因此在这个基础上再取看id,自然少了很多数据
就是:我前两个表固定了订单数据,第三个表注册数据不仅要和订单数据的时间一致,而且uid也要一致,但是用户注册时间和下单时间不一样啊!!!!
自己真的想叉了:用户首先要注册才能下单
所以注册表的id一定是全的,
select
t.all_time
,count(distinct case when t.all_time = u.create_time then u.id else null end)
,count(distinct case when t.all_time = o.create_time then o.id else null end)
,count(distinct case when t.all_time = o.create_time then o.u_id else null end)
from
user as u
left join
oorder as o
on u.id = o.u_id
,
(SELECT DISTINCT create_time as all_time FROM oorder UNION
SELECT DISTINCT create_time as all_time FROM user
) AS t
group by t.all_time
上面这种方法是正确做法:首先连接2个表确保用户id一致,之后相当于全连接了一个时间表【注意不要再加on条件了,不然会出错】
哎,思考的还是不是很好,因为有些sql不支持全连接,倒来倒去还是自己原始做的比较实用