SQL查询统计问题

--今天在公司遇到一个比较棘手的SQL查询问题。
--寻思了一晚上如果查询,最终总算了有了结果。
--题目:有表1 客户信息表,字段:CustomerID,自增长,CustomerName 主键,
--有表2 邮件发送记录记录表,字段 CustomerID 客户ID,EmailID 邮件ID,SendDate 发送时间。要求:查找当天的前X天内发送邮件的记录小于等于Y条的客户,显示客户的信息为 客户ID,客户姓名,邮件总发送数量,最近一封邮件发送时间。
--注意:客户表日增长量为1W条,发送记录为每一个客户每一年邮件发送数量不少于10条。
--今天晚上寻思到两个结果,要证明两个结果的性能好坏。为了节约时间,我们可以先把数据库建立起来。
USE master
SET NOCOUNT ON 
GO
----创建客户表
IF EXISTS(SELECT * FROM  SYS.TABLES WHERE [NAME] ='Customer')
DROP TABLE  Customer
CREATE TABLE Customer
(
CustomerID INT IDENTITY(1,1) NOT NULL,
CustomerName VARCHAR(20) PRIMARY KEY NOT NULL 
)
GO
---创建发送记录表
IF EXISTS(SELECT * FROM SYS.TABLES WHERE NAME='SendInfo' )
DROP TABLE SendInfo
CREATE TABLE SendInfo
(
CustomerID INT ,
SendDate DATETIME ,
EmailID INT
)
GO
------循环插入数据
------模拟插入100W客户,400W条数据,最后再插入1W个新客户
------模拟最近4天,每天都为客户发送了一条邮件
DECLARE @num INT,
@index INT ,
@j INT,
@id INT
SET @index =1 ;
SET @num =1000000;
SET @j =1;
WHILE(@index <=@num)
BEGIN
INSERT INTO Customer (CustomerName) VALUES ('Cus'+Convert(varchar(20),@index) );
SELECT  @id =@@IDENTITY;
WHILE(@j<=4)
BEGIN
INSERT INTO SendInfo(CustomerID, SendDate, EmailID) VALUES
( @id, Dateadd(d,-@j,getdate()),@j);
SET @j =@j +1;
END
SET @J =1 ;
SET @index =@index +1;
END
SET @index = 1;
WHILE (@index<= 10000)
BEGIN
INSERT INTO Customer (CustomerName) VALUES ('Cus'+Convert(varchar(20),@index)+'New' );
SELECT  @id =@@IDENTITY;
SET @index = @index+1
END
GO




--------写出查询语句
--当个来看,要把客户的最后一次发送时间和发送总次数查询出来不简单,
--问题是还要将发送频率的条件进行过滤,并且是小于等于,
--那么这个时间要求的日期内,没有发送记录的客户,也就属于了合理的条件范围内。
--我们来进行分解。


--1、主干是客户
--组建A表,查询客户ID和最后一个发送时间,总发送次数
select c.CustomerID ,c.CustomerName,isnull(sumCount,0) sumCount,lastDate from 
Customer c left join (
select CustomerId,max(Senddate)lastDate ,count(EmailID)sumCount from sendInfo group by CustomerId 
) b on (c.CustomerID = b.CustomerId) 


GO
--2、条件是关键
--根据条件组装一个B表,查找出符合要求的客户ID,这段时间内没有发送记录的客户数量是Y
select c.CustomerID,count(s.CustomerId) num 
from Customer c
left join sendInfo s on (s.CustomerId = c.CustomerID and datediff(d,SendDate,getdate()) <= X(天))
group by c.CustomerID
having count(s.CustomerId) <= Y(条) 


GO
---3、接下来便是组装这个条件和查询结果
----第一种情况,使用连接,讲A,B表连接,通过on进行最后的次数过滤
select getdate();--查询开始时间


select A1.*,B1.Num from
(
select c.CustomerID ,c.CustomerName,isnull(sumCount,0) sumCount,lastSendDate from 
Customer c left join (
select CustomerID,max(SendDate)lastSendDate ,count(EmailID)sumCount from sendInfo group by CustomerID 
) b on (c.CustomerID = b.CustomerID) 
)  A1 
inner join 
(
select c.CustomerID,count(s.CustomerID) num from Customer c
left join sendInfo s on (s.CustomerID = c.CustomerID and Datediff(d,SendDate,getdate()) <= 2)
group by c.CustomerID
) B1  on (A1.CustomerID= B1.CustomerID and B1.num <=2 );


select getdate();---查询结束时间
GO


--第二种,先对B表进行次数过滤,然后将过滤结果的CustomerID再对A表进行in查询
select getdate();


select A1.* from
(
select c.CustomerID ,c.CustomerName,isnull(sumCount,0) sumCount,lastDate from 
Customer c left join (
select CustomerId,max(Senddate)lastDate ,count(EmailID)sumCount from sendInfo group by CustomerId 
) b on (c.CustomerID = b.CustomerId) 
)  A1 
where A1.CustomerID in 
(
select CustomerID from (
select c.CustomerID,count(s.CustomerId) num from Customer c
left join sendInfo s on (s.CustomerId = c.CustomerID and datediff(d,SendDate,getdate()) <= 2)
group by c.CustomerID
having count(s.CustomerId) <= 2 
) B1
);


select getdate();
-----------------------
----查询结束




--现在数据插入还没有完成。虽然我具体不知道各种查询计划和执行计划对于SQL的比较有什么用,
--但是我知道,一个TSQL,他会先编译SQL语句,转换成为他的逻辑执行计划,然后具体到磁盘中执行数据读取,
--这一个称之为物理执行计划。逻辑执行计划有一定的顺序。具体顺序如下:
--(小弟虽有一本MSSQL2008技术内幕T-SQL查询,但是一直没有时间和精力去读懂他和运用他。
--本身SQL的基础不强,这本书学习起来相当吃力。)
--(8)  SELECT (9) DISTINCT (11) <TOP_specification> <select_list>
--(1)  FROM <left_table>
--(3)    <join_type> JOIN <right_table>
--(2)      ON <join_condition>
--(4)  WHERE <where_condition>
--(5)  GROUP BY <group_by_list>
--(6)  WITH {CUBE | ROLLUP}
--(7)  HAVING <having_condition>
--(10) ORDER BY <order_by_list>
--1.  FROM: A Cartesian product (cross join) is performed between the first two tables in the FROM clause, and as a result, virtual table VT1 is generated.
--2.  ON: The ON filter is applied to VT1. Only rows for which the <join_condition> is TRUE are inserted to VT2.
--3.  OUTER (join): If an OUTER JOIN is specified (as opposed to a CROSS JOIN or an INNER JOIN), rows from the preserved table or tables for which a match was not found are added to the rows from VT2 as outer rows, generating VT3. If more than two tables appear in the FROM clause, steps 1 through 3 are applied repeatedly between the result of the last join and the next table in the FROM clause until all tables are processed.
--4.  WHERE: The WHERE filter is applied to VT3. Only rows for which the <where_condition> is TRUE are inserted to VT4.
--5.  GROUP BY: The rows from VT4 are arranged in groups based on the column list specified in the GROUP BY clause. VT5 is generated.
--6.  CUBE | ROLLUP: Supergroups (groups of groups) are added to the rows from VT5, generating VT6.
--7.  HAVING: The HAVING filter is applied to VT6. Only groups for which the <having_condition> is TRUE are inserted to VT7.
--8.  SELECT: The SELECT list is processed, generating VT8.
--9.  DISTINCT: Duplicate rows are removed from VT8. VT9 is generated.
--10.  ORDER BY: The rows from VT9 are sorted according to the column list specified in the ORDER BY clause. A cursor is generated (VC10).
--11.  TOP: The specified number or percentage of rows is selected from the beginning of VC10. Table VT11 is generated and returned to the caller.


--了解这些执行计划的顺序,有利于写出一个好的TSQL,减少笛卡尔乘积
-- 由于现在已经晚上11点了,要休息了。数据库建立已经完成,另外关于物理执行计划这些详细的内容我无法提供,所有大家可以自己去
--查看红色钩钩(执行)按钮旁边的(显示估计的执行计划)按钮,查看每一种查询方案的具体执行计划,去了解每一个过程的资源损耗,
--去了解他的解析过程。这很有帮助


---最终结果是:
--方案二:第一次执行用去了:21秒。开始时间 2011-10-13 23:17:46.097 结束时间 2011-10-13 23:18:07.190
-- (1010000 行受影响)
-- 第二次执行,用时 21秒。
--方案一:第一次执行 22秒
-- 开始时间:2011-10-13 23:19:48.760 结束时间 2011-10-13 23:22:41.343 
-- (1010000 行受影响)
-- 第二次执行,用去22秒




------将次数改成为0的时候,瞬间CPU占用率100%,
--方案一:执行了5分30秒,才显示出来1970条数据
--方案二:执行了5分30秒,才显示出2421条数据.
--几乎同等的速度,因为在方案一执行过程期间,我进行过其他程序操作,占用了系统资源.


---第三次尝试  num<= 1 
---方案二:执行3秒,完毕,(10000 行受影响) 2011-10-13 23:49:13.753 - 2011-10-13 23:49:17.013
---方案一:执行3秒,完毕,(10000 行受影响) 2011-10-13 23:48:01.807 - 2011-10-13 23:48:04.990






select c.CustomerID from Customer c
left join sendInfo s on (s.CustomerId = c.CustomerID and datediff(d,SendDate,getdate()) <= 2)
group by c.CustomerID
having count(s.CustomerId) <= 1 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值