8.1 Over 子句
--聚合函数跟Over开窗函数一起使用的例子
--over() 后面没有Partition by 分区子句,则基于整个输入计算聚合
select orderid,custid,empid,val,
cast((val/sum(val) over() *100.) as numeric(4,2)) as pct,
cast(val-avg(val) over() as numeric(10,2)) as diff
from dbo.myordervalues
--over() 加Partition by 分区子句,基于分区结果计算聚合
--使用over子句相对于使用子查询,速度快很多。 需要多次聚合时,子查询解决方案必须使用多个子查询,每个子查询都回读取一次数据。而使用Over子句只读取以便数据,计算多个聚合值
select orderid,custid,empid,val,
cast((val/sum(val) over(partition by custid) *100.) as numeric(5,2)) as pct,
cast(val-avg(val) over(partition by custid) as numeric(10,2)) as diff
from dbo.myordervalues
8.2 决胜属性(Tiebreaker)
返回每个雇员最新的订单
子查询解决方案过于依赖索引,必须在分区列,排序列,决胜属性列上建有索引效率才能保证。
使用聚合技术的解法,不依赖索引也能获得好性能
解决方案比较复杂,不推荐
8.3 连续聚合
8.3.1 累计聚合
--设置索引的原则,在分组列,和排序列有键,包括覆盖列
--扩展压缩技术,直观性相对较差,计算两个以上聚合变量性能较号
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
left join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth
--子查询技术,相对直观,但在计算两个以上聚合值时不划算,需要为每次聚合扫描一次数据
select o1.empid,convert(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
(select sum(o2.qty) from dbo.EmpOrders o2
where o2.empid=o1.empid and o2.ordmonth <=o1.ordmonth) as totalqty
from dbo.EmpOrders o1
order by o1.empid,o1.ordmonth
--返回每个员工累计达到某个目标时的各条数,目标值可以关联Targets表
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty,count(*)
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
having sum(o2.qty)-o1.qty<1000
--and sum(o2.qty)>=1000 --增加该条件,只返回达到目标值当月的行
order by o1.empid,o1.ordmonth
8.3.2 滑动聚合(Sliding Aggregation)
--计算前三个月(含当前月)总和以及平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
and o2.ordmonth>DATEADD(month,-3,o1.ordmonth)
and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth
8.3.3 年初至今(YTD)
--计算每个员工年初至今的数量汇总,平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
and o2.ordmonth>cast(cast(year(o1.ordmonth) as char(4)) +'0101' as datetime)
and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth
8.4 透视转换(Pivoting)
8.4.1 透视转换属性
--分组-扩展-聚合
--case when 不带else时默认 else null
select objectid,
max(case when attribute='attr1' then value end) as attr1,
max(case when attribute='attr2' then value end) as attr2,
max(case when attribute='attr3' then value end) as attr3,
max(case when attribute='attr4' then value end) as attr4,
max(case when attribute='attr5' then value end) as attr5
from dbo.OpenSchema
group by objectid
--Pivot解决方法
select objectid,attr1,attr2,attr3,attr4
from dbo.OpenSchema
pivot(max(value) for attribute in (attr1,attr2,attr3,attr4)) as p
8.4.2 关系除法
--用透视转化来解决关系除法问题,例,返回同时包含几种产品(2,3,4)的订单
with OT as
(
select orderid,[1] as p1,[2] as p2,[3] as p3,[4] as p4
from dbo.OrderDetails
pivot (count(productid) for productid in ([1],[2],[3],[4])) as p
)
select orderid
from OT
where p2=1 and p3=1 and p4=1
8.4.3 聚合数据
select *
from(select custid,year(orderdate) as orderyear,qty from dbo.Orders) as a
pivot(sum(qty) for orderyear in ([2006],[2007],[2008])) as p
8.5 逆透视转换
--常规解法,通过手动生成副本,再用Case when
select custid,
case when n=1 then [2006]
when n=2 then [2007]
when n=3 then [2008] end as qty
from dbo.PvtCustOrders
cross join dbo.Nums where n<4
and case when n=1 then [2006]
when n=2 then [2007]
when n=3 then [2008] end is not null
order by custid
--Unpivot 解法
select custid,orderyear,qty--orderyear,qty源表中这两个属性不存在,分别标识源列旋转后的目标列名称,及源表列值的目标列名称
from dbo.PvtCustOrders
unpivot(qty for orderyear in ([2006],[2007],[2008])) as p
8.6 自定义聚合
没有内建聚合函数的聚合,如串联字符串,计算乘积,计算中值等
--用Pivot旋转后联接,串联字符串
--coalesce函数将Null替换成想要的字符,这里替换成''
select groupid,
[1]+coalesce(','+[2],'')
+coalesce(','+[3],'')
+coalesce(','+[4],'') as string
from(select groupid,string,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(string) for rn in ([1],[2],[3],[4])) as p
--串联字符串的专用解法
select a.groupid,
stuff((select ','+string from dbo.Groups b where b.groupid=a.groupid for XML PATH('')) ,1,1,'')
as string
from dbo.Groups a
group by a.groupid
--用Pivot旋转后计算乘积
select groupid,
[1]*coalesce([2],1)
*coalesce([3],1)
*coalesce([4],1) as Prod
from(select groupid,val,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(val) for rn in ([1],[2],[3],[4])) as p
--动态透视转换
--QUOTENAME()的主要作用就是在存储过程中,给列名、表名等加个[]、’’等以保证sql语句能正常执行。
-- Dynamic PIVOT
示例1:
DECLARE @sql AS NVARCHAR(1000);
SET @sql = N'SELECT *
FROM (SELECT custid, YEAR(orderdate) AS orderyear, qty
FROM dbo.Orders) AS D
PIVOT(SUM(qty) FOR orderyear IN(' +
STUFF(
(SELECT N',' + QUOTENAME(orderyear) AS [text()]
FROM (SELECT DISTINCT YEAR(orderdate) AS orderyear
FROM dbo.Orders) AS Years
ORDER BY orderyear
FOR XML PATH('')
), 1, 1, '')
+ N')) AS P;';
8.8 分组因子
--计算分组因子,返回<50且大于当前日期的第一个日期
select min(price) minprice,max(price) maxprice,count(1) as numdays,
min(dt) startday,max(dt) as endday
from(
select dt,price,
(select min(dt)
from dbo.Stocks b
where b.dt>a.dt and b.price<50) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp
--计算行号,通过行号生成分组因子
select min(price) minprice,max(price) maxprice,count(1) as numdays,
min(dt) startday,max(dt) as endday
from(
select dt,price,
DATEADD(day,-1* row_number() over(order by dt),dt) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp
8.9 分组集
--在group by中使用Grouping Sets从属子句,定义多个分组集
select custid,empid,year(orderdate) as orderyear,sum(qty) as qty
from dbo.Orders
group by grouping sets(
(custid,empid,year(orderdate)),
(custid,year(orderdate)),
(empid,year(orderdate)),
()
)
go
--CUBE从属子句,接受一列元素作为输入,定义这些元素上所有可能的分组集,包括空集
--CUBE从属子句一定程度上可以用group by grouping sets()子句来代替
select custid,empid,sum(qty) sumqty
from dbo.Orders
group by cube(custid,empid)
--ROLLUP 从属子句,认为输入的元素存在层次关系。给定n个元素,生成n+1个分组
select year(orderdate) as orderyear,
MONTH(orderdate) as ordermonth,
day(orderdate) as orderday,
sum(qty) as orderqty
from dbo.Orders
group by rollup(year(orderdate),month(orderdate),day(orderdate))