第8章 数据聚合和透视

8.1 Over 子句

--聚合函数跟Over开窗函数一起使用的例子
--over() 后面没有Partition by 分区子句,则基于整个输入计算聚合
select orderid,custid,empid,val,
	cast((val/sum(val) over() *100.) as numeric(4,2)) as pct,
	cast(val-avg(val) over() as numeric(10,2)) as  diff
from dbo.myordervalues

--over() 加Partition by 分区子句,基于分区结果计算聚合
--使用over子句相对于使用子查询,速度快很多。 需要多次聚合时,子查询解决方案必须使用多个子查询,每个子查询都回读取一次数据。而使用Over子句只读取以便数据,计算多个聚合值
select orderid,custid,empid,val,
	cast((val/sum(val) over(partition by custid) *100.) as numeric(5,2)) as pct,
	cast(val-avg(val) over(partition by custid) as numeric(10,2)) as  diff
from dbo.myordervalues

8.2 决胜属性(Tiebreaker)
返回每个雇员最新的订单
子查询解决方案过于依赖索引,必须在分区列,排序列,决胜属性列上建有索引效率才能保证。
使用聚合技术的解法,不依赖索引也能获得好性能
解决方案比较复杂,不推荐

8.3 连续聚合
8.3.1 累计聚合

--设置索引的原则,在分组列,和排序列有键,包括覆盖列
--扩展压缩技术,直观性相对较差,计算两个以上聚合变量性能较号
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
left join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

--子查询技术,相对直观,但在计算两个以上聚合值时不划算,需要为每次聚合扫描一次数据
select o1.empid,convert(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	(select sum(o2.qty) from dbo.EmpOrders o2
		where o2.empid=o1.empid and o2.ordmonth <=o1.ordmonth) as totalqty
from dbo.EmpOrders o1
order by o1.empid,o1.ordmonth

--返回每个员工累计达到某个目标时的各条数,目标值可以关联Targets表
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty,count(*)
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
having sum(o2.qty)-o1.qty<1000 
	--and sum(o2.qty)>=1000 --增加该条件,只返回达到目标值当月的行
order by o1.empid,o1.ordmonth

8.3.2 滑动聚合(Sliding Aggregation)

--计算前三个月(含当前月)总和以及平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
	o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
	and o2.ordmonth>DATEADD(month,-3,o1.ordmonth) 
	and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

8.3.3 年初至今(YTD)

--计算每个员工年初至今的数量汇总,平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
	o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
	and o2.ordmonth>cast(cast(year(o1.ordmonth) as char(4)) +'0101' as datetime) 
	and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

8.4 透视转换(Pivoting)
8.4.1 透视转换属性

--分组-扩展-聚合
--case when 不带else时默认 else null
select objectid,
	max(case when attribute='attr1' then value end) as attr1,
	max(case when attribute='attr2' then value end) as attr2,
	max(case when attribute='attr3' then value end) as attr3,
	max(case when attribute='attr4' then value end) as attr4,
	max(case when attribute='attr5' then value end) as attr5
from dbo.OpenSchema
group by objectid

--Pivot解决方法
select objectid,attr1,attr2,attr3,attr4
from dbo.OpenSchema
pivot(max(value) for attribute in (attr1,attr2,attr3,attr4)) as p

8.4.2 关系除法

--用透视转化来解决关系除法问题,例,返回同时包含几种产品(2,3,4)的订单
with OT as
(
select orderid,[1] as p1,[2] as p2,[3] as p3,[4] as p4
from dbo.OrderDetails
pivot (count(productid) for productid in ([1],[2],[3],[4])) as p
)
select orderid
from OT
where p2=1 and p3=1 and p4=1

8.4.3 聚合数据

select * 
from(select custid,year(orderdate) as orderyear,qty from dbo.Orders) as a
pivot(sum(qty) for orderyear in ([2006],[2007],[2008])) as p

8.5 逆透视转换

--常规解法,通过手动生成副本,再用Case when
select custid,
	case when n=1 then [2006] 
		 when n=2 then [2007]
		 when n=3 then [2008] end as qty
from dbo.PvtCustOrders
cross join dbo.Nums where n<4
and case when n=1 then [2006] 
		 when n=2 then [2007]
		 when n=3 then [2008] end is not null 
order by custid

--Unpivot 解法
select custid,orderyear,qty--orderyear,qty源表中这两个属性不存在,分别标识源列旋转后的目标列名称,及源表列值的目标列名称
from dbo.PvtCustOrders
unpivot(qty for orderyear in ([2006],[2007],[2008])) as p

8.6 自定义聚合
没有内建聚合函数的聚合,如串联字符串,计算乘积,计算中值等

--用Pivot旋转后联接,串联字符串
--coalesce函数将Null替换成想要的字符,这里替换成''
select groupid,
	[1]+coalesce(','+[2],'') 
	   +coalesce(','+[3],'')
	   +coalesce(','+[4],'') as string
from(select groupid,string,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(string) for rn in ([1],[2],[3],[4])) as p

串联字符串:

--串联字符串的专用解法

select a.groupid,
   stuff((select ','+string from dbo.Groups b where b.groupid=a.groupid for XML PATH('')) ,1,1,'')
	as string
from dbo.Groups a
group by a.groupid
--用Pivot旋转后计算乘积
select groupid,
	[1]*coalesce([2],1) 
	   *coalesce([3],1)
	   *coalesce([4],1) as Prod
from(select groupid,val,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(val) for rn in ([1],[2],[3],[4])) as p

--动态透视转换
--QUOTENAME()的主要作用就是在存储过程中,给列名、表名等加个[]、’’等以保证sql语句能正常执行。
-- Dynamic PIVOT
示例1:
DECLARE @sql AS NVARCHAR(1000);

SET @sql = N'SELECT *
FROM (SELECT custid, YEAR(orderdate) AS orderyear, qty
      FROM dbo.Orders) AS D
  PIVOT(SUM(qty) FOR orderyear IN(' +

 STUFF(
  (SELECT N',' + QUOTENAME(orderyear) AS [text()]
   FROM (SELECT DISTINCT YEAR(orderdate) AS orderyear
         FROM dbo.Orders) AS Years
   ORDER BY orderyear
   FOR XML PATH('')
   ), 1, 1, '') 
+ N')) AS P;';

8.8 分组因子

--计算分组因子,返回<50且大于当前日期的第一个日期
select min(price) minprice,max(price) maxprice,count(1) as numdays,
	min(dt) startday,max(dt) as endday
from(
select dt,price,
	(select min(dt) 
	 from dbo.Stocks b 
	 where b.dt>a.dt and b.price<50) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp

--计算行号,通过行号生成分组因子
select min(price) minprice,max(price) maxprice,count(1) as numdays,
	min(dt) startday,max(dt) as endday
from(
select dt,price,
	DATEADD(day,-1* row_number() over(order by dt),dt) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp

8.9 分组集

--在group by中使用Grouping Sets从属子句,定义多个分组集
select custid,empid,year(orderdate) as orderyear,sum(qty) as qty
from dbo.Orders
group by grouping sets(
	(custid,empid,year(orderdate)),
	(custid,year(orderdate)),
	(empid,year(orderdate)),
	()
)
go

--CUBE从属子句,接受一列元素作为输入,定义这些元素上所有可能的分组集,包括空集
--CUBE从属子句一定程度上可以用group by grouping sets()子句来代替
select custid,empid,sum(qty) sumqty
from dbo.Orders
group by cube(custid,empid)

--ROLLUP 从属子句,认为输入的元素存在层次关系。给定n个元素,生成n+1个分组
select year(orderdate) as orderyear,
	MONTH(orderdate) as ordermonth,
	day(orderdate) as orderday,
	sum(qty) as orderqty
from dbo.Orders
group by rollup(year(orderdate),month(orderdate),day(orderdate))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值