第8章数据聚合和透视

最新推荐文章于 2022-11-07 13:44:17 发布

qq_20227495

最新推荐文章于 2022-11-07 13:44:17 发布

阅读量235

点赞数

分类专栏： SQLServer 文章标签： SQL Server2008 技术内幕笔记

本文链接：https://blog.csdn.net/qq_20227495/article/details/85156102

版权

SQLServer 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

8.1 Over 子句

--聚合函数跟Over开窗函数一起使用的例子
--over() 后面没有Partition by 分区子句，则基于整个输入计算聚合
select orderid,custid,empid,val,
	cast((val/sum(val) over() *100.) as numeric(4,2)) as pct,
	cast(val-avg(val) over() as numeric(10,2)) as  diff
from dbo.myordervalues

--over() 加Partition by 分区子句，基于分区结果计算聚合
--使用over子句相对于使用子查询，速度快很多。 需要多次聚合时，子查询解决方案必须使用多个子查询，每个子查询都回读取一次数据。而使用Over子句只读取以便数据，计算多个聚合值
select orderid,custid,empid,val,
	cast((val/sum(val) over(partition by custid) *100.) as numeric(5,2)) as pct,
	cast(val-avg(val) over(partition by custid) as numeric(10,2)) as  diff
from dbo.myordervalues

8.2 决胜属性(Tiebreaker)
返回每个雇员最新的订单
子查询解决方案过于依赖索引，必须在分区列，排序列，决胜属性列上建有索引效率才能保证。
使用聚合技术的解法，不依赖索引也能获得好性能
解决方案比较复杂，不推荐

8.3 连续聚合
8.3.1 累计聚合

--设置索引的原则，在分组列，和排序列有键，包括覆盖列
--扩展压缩技术，直观性相对较差，计算两个以上聚合变量性能较号
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
left join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

--子查询技术，相对直观，但在计算两个以上聚合值时不划算，需要为每次聚合扫描一次数据
select o1.empid,convert(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	(select sum(o2.qty) from dbo.EmpOrders o2
		where o2.empid=o1.empid and o2.ordmonth <=o1.ordmonth) as totalqty
from dbo.EmpOrders o1
order by o1.empid,o1.ordmonth

--返回每个员工累计达到某个目标时的各条数，目标值可以关联Targets表
select o1.empid,CONVERT(varchar(7),o1.ordmonth,112) as ordermonth,o1.qty,
	sum(o2.qty) as totalqty,cast(avg(o2.qty) as numeric(12,2)) as avgqty,count(*)
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
having sum(o2.qty)-o1.qty<1000 
	--and sum(o2.qty)>=1000 --增加该条件，只返回达到目标值当月的行
order by o1.empid,o1.ordmonth

8.3.2 滑动聚合(Sliding Aggregation)

--计算前三个月(含当前月)总和以及平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
	o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
	and o2.ordmonth>DATEADD(month,-3,o1.ordmonth) 
	and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

8.3.3 年初至今(YTD)

--计算每个员工年初至今的数量汇总，平均值
select o1.empid,convert(varchar(7),o1.ordmonth,121) as ordermonth,
	o1.qty,sum(o2.qty) as totalqty, CAST(avg(o2.qty) as numeric(12,2)) as avgqty
from dbo.EmpOrders o1
inner join dbo.EmpOrders o2 on o2.empid=o1.empid
	and o2.ordmonth>cast(cast(year(o1.ordmonth) as char(4)) +'0101' as datetime) 
	and o2.ordmonth<=o1.ordmonth
group by o1.empid,o1.ordmonth,o1.qty
order by o1.empid,o1.ordmonth

8.4 透视转换(Pivoting)
8.4.1 透视转换属性

--分组-扩展-聚合
--case when 不带else时默认 else null
select objectid,
	max(case when attribute='attr1' then value end) as attr1,
	max(case when attribute='attr2' then value end) as attr2,
	max(case when attribute='attr3' then value end) as attr3,
	max(case when attribute='attr4' then value end) as attr4,
	max(case when attribute='attr5' then value end) as attr5
from dbo.OpenSchema
group by objectid

--Pivot解决方法
select objectid,attr1,attr2,attr3,attr4
from dbo.OpenSchema
pivot(max(value) for attribute in (attr1,attr2,attr3,attr4)) as p

8.4.2 关系除法

--用透视转化来解决关系除法问题，例，返回同时包含几种产品(2,3,4)的订单
with OT as
(
select orderid,[1] as p1,[2] as p2,[3] as p3,[4] as p4
from dbo.OrderDetails
pivot (count(productid) for productid in ([1],[2],[3],[4])) as p
)
select orderid
from OT
where p2=1 and p3=1 and p4=1

8.4.3 聚合数据

select * 
from(select custid,year(orderdate) as orderyear,qty from dbo.Orders) as a
pivot(sum(qty) for orderyear in ([2006],[2007],[2008])) as p

8.5 逆透视转换

--常规解法，通过手动生成副本，再用Case when
select custid,
	case when n=1 then [2006] 
		 when n=2 then [2007]
		 when n=3 then [2008] end as qty
from dbo.PvtCustOrders
cross join dbo.Nums where n<4
and case when n=1 then [2006] 
		 when n=2 then [2007]
		 when n=3 then [2008] end is not null 
order by custid

--Unpivot 解法
select custid,orderyear,qty--orderyear,qty源表中这两个属性不存在，分别标识源列旋转后的目标列名称，及源表列值的目标列名称
from dbo.PvtCustOrders
unpivot(qty for orderyear in ([2006],[2007],[2008])) as p

8.6 自定义聚合
没有内建聚合函数的聚合，如串联字符串，计算乘积，计算中值等

--用Pivot旋转后联接，串联字符串
--coalesce函数将Null替换成想要的字符,这里替换成''
select groupid,
	[1]+coalesce(','+[2],'') 
	   +coalesce(','+[3],'')
	   +coalesce(','+[4],'') as string
from(select groupid,string,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(string) for rn in ([1],[2],[3],[4])) as p

串联字符串:

--串联字符串的专用解法

select a.groupid,
   stuff((select ','+string from dbo.Groups b where b.groupid=a.groupid for XML PATH('')) ,1,1,'')
	as string
from dbo.Groups a
group by a.groupid

--用Pivot旋转后计算乘积
select groupid,
	[1]*coalesce([2],1) 
	   *coalesce([3],1)
	   *coalesce([4],1) as Prod
from(select groupid,val,ROW_NUMBER() over(partition by groupid order by groupid) as rn
from dbo.Groups ) as d
pivot(max(val) for rn in ([1],[2],[3],[4])) as p

--动态透视转换
--QUOTENAME()的主要作用就是在存储过程中，给列名、表名等加个[]、’’等以保证sql语句能正常执行。
-- Dynamic PIVOT
示例1：
DECLARE @sql AS NVARCHAR(1000);

SET @sql = N'SELECT *
FROM (SELECT custid, YEAR(orderdate) AS orderyear, qty
      FROM dbo.Orders) AS D
  PIVOT(SUM(qty) FOR orderyear IN(' +

 STUFF(
  (SELECT N',' + QUOTENAME(orderyear) AS [text()]
   FROM (SELECT DISTINCT YEAR(orderdate) AS orderyear
         FROM dbo.Orders) AS Years
   ORDER BY orderyear
   FOR XML PATH('')
   ), 1, 1, '') 
+ N')) AS P;';

8.8 分组因子

--计算分组因子，返回<50且大于当前日期的第一个日期
select min(price) minprice,max(price) maxprice,count(1) as numdays,
	min(dt) startday,max(dt) as endday
from(
select dt,price,
	(select min(dt) 
	 from dbo.Stocks b 
	 where b.dt>a.dt and b.price<50) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp

--计算行号，通过行号生成分组因子
select min(price) minprice,max(price) maxprice,count(1) as numdays,
	min(dt) startday,max(dt) as endday
from(
select dt,price,
	DATEADD(day,-1* row_number() over(order by dt),dt) as grp
from dbo.Stocks a
where a.price>=50
) aa
group by grp

8.9 分组集

--在group by中使用Grouping Sets从属子句，定义多个分组集
select custid,empid,year(orderdate) as orderyear,sum(qty) as qty
from dbo.Orders
group by grouping sets(
	(custid,empid,year(orderdate)),
	(custid,year(orderdate)),
	(empid,year(orderdate)),
	()
)
go

--CUBE从属子句，接受一列元素作为输入，定义这些元素上所有可能的分组集，包括空集
--CUBE从属子句一定程度上可以用group by grouping sets()子句来代替
select custid,empid,sum(qty) sumqty
from dbo.Orders
group by cube(custid,empid)

--ROLLUP 从属子句，认为输入的元素存在层次关系。给定n个元素，生成n+1个分组
select year(orderdate) as orderyear,
	MONTH(orderdate) as ordermonth,
	day(orderdate) as orderday,
	sum(qty) as orderqty
from dbo.Orders
group by rollup(year(orderdate),month(orderdate),day(orderdate))

qq_20227495

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
第8章数据聚合和透视

8.1 Over 子句--聚合函数跟Over开窗函数一起使用的例子--over() 后面没有Partition by 分区子句，则基于整个输入计算聚合select orderid,custid,empid,val, cast((val/sum(val) over() *100.) as numeric(4,2)) as pct, cast(val-avg(val) over() as n...
复制链接

扫一扫