中值有两种定义:
1、当组中包含奇数个元素时,我们将直接返回中间的值
2、当组中包含偶数个元素时,返回两个中间值的平均值
示例:
[color=blue]--构造Groups表[/color]
IF OBJECT_ID('dbo.Groups') IS NOT NULL
DROP TABLE dbo.Groups;
GO
CREATE TABLE dbo.Groups
(
groupid VARCHAR(10) NOT NULL,
memberid INT NOT NULL,
string VARCHAR(10) NOT NULL,
val INT NOT NULL,
PRIMARY KEY (groupid, memberid)
);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('a', 3, 'stra1', 6);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('a', 9, 'stra2', 7);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 2, 'strb1', 3);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 4, 'strb2', 7);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 5, 'strb3', 3);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 9, 'strb4', 11);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 3, 'strc1', 8);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 7, 'strc2', 10);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 9, 'strc3', 12);
GO
[color=blue]--使用NTILE求中值[/color]
WITH Tiles AS
(
SELECT groupid, val,
NTILE(2) OVER(PARTITION BY groupid ORDER BY val) AS tile
FROM dbo.Groups
),
GroupedTiles AS
(
SELECT groupid, tile, COUNT(*) AS cnt,
CASE WHEN tile = 1 THEN MAX(val) ELSE MIN(val) END AS val
FROM Tiles
GROUP BY groupid, tile
)
SELECT groupid,
CASE WHEN MIN(cnt) = MAX(cnt) THEN AVG(1.*val)
ELSE MIN(val) END AS median
FROM GroupedTiles
GROUP BY groupid;
[color=blue]
--利用ROW_NUMBER求中值,参与中值计算的两个值得绝对差小于或等于1[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER()
OVER(PARTITION BY groupid ORDER BY val, memberid) AS rna,
ROW_NUMBER()
OVER(PARTITION BY groupid ORDER BY val DESC, memberid DESC) AS rnd
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE ABS(rna - rnd) <= 1
GROUP BY groupid;
[color=blue]--通过从第一个行号衍生出第二个行号,降序行号可以用组中的行数减去升序行号再加1得到[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER() OVER(PARTITION BY groupid ORDER BY val) AS rn,
COUNT(*) OVER(PARTITION BY groupid) AS cnt
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE ABS(2*rn - cnt - 1) <= 1
GROUP BY groupid;
[color=blue]--IN((cnt+1)/2, (cnt+2)/2)方法[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER() OVER(PARTITION BY groupid ORDER BY val) AS rn,
COUNT(*) OVER(PARTITION BY groupid) AS cnt
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE rn IN((cnt+1)/2, (cnt+2)/2)
GROUP BY groupid;
GO
[color=blue]--使用top的方法求中值[/color]SELECT DISTINCT groupid,
((SELECT MAX(val)
FROM (SELECT TOP 50 PERCENT val
FROM dbo.Groups AS H1
WHERE H1.groupid = G.groupid
ORDER BY val) AS M1)
+
(SELECT MIN(val)
FROM (SELECT TOP 50 PERCENT val
FROM dbo.Groups AS H2
WHERE H2.groupid = G.groupid
ORDER BY val DESC) AS M2))
/2. AS median
FROM dbo.Groups AS G;
1、当组中包含奇数个元素时,我们将直接返回中间的值
2、当组中包含偶数个元素时,返回两个中间值的平均值
示例:
[color=blue]--构造Groups表[/color]
IF OBJECT_ID('dbo.Groups') IS NOT NULL
DROP TABLE dbo.Groups;
GO
CREATE TABLE dbo.Groups
(
groupid VARCHAR(10) NOT NULL,
memberid INT NOT NULL,
string VARCHAR(10) NOT NULL,
val INT NOT NULL,
PRIMARY KEY (groupid, memberid)
);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('a', 3, 'stra1', 6);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('a', 9, 'stra2', 7);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 2, 'strb1', 3);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 4, 'strb2', 7);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 5, 'strb3', 3);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('b', 9, 'strb4', 11);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 3, 'strc1', 8);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 7, 'strc2', 10);
INSERT INTO dbo.Groups(groupid, memberid, string, val)
VALUES('c', 9, 'strc3', 12);
GO
[color=blue]--使用NTILE求中值[/color]
WITH Tiles AS
(
SELECT groupid, val,
NTILE(2) OVER(PARTITION BY groupid ORDER BY val) AS tile
FROM dbo.Groups
),
GroupedTiles AS
(
SELECT groupid, tile, COUNT(*) AS cnt,
CASE WHEN tile = 1 THEN MAX(val) ELSE MIN(val) END AS val
FROM Tiles
GROUP BY groupid, tile
)
SELECT groupid,
CASE WHEN MIN(cnt) = MAX(cnt) THEN AVG(1.*val)
ELSE MIN(val) END AS median
FROM GroupedTiles
GROUP BY groupid;
[color=blue]
--利用ROW_NUMBER求中值,参与中值计算的两个值得绝对差小于或等于1[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER()
OVER(PARTITION BY groupid ORDER BY val, memberid) AS rna,
ROW_NUMBER()
OVER(PARTITION BY groupid ORDER BY val DESC, memberid DESC) AS rnd
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE ABS(rna - rnd) <= 1
GROUP BY groupid;
[color=blue]--通过从第一个行号衍生出第二个行号,降序行号可以用组中的行数减去升序行号再加1得到[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER() OVER(PARTITION BY groupid ORDER BY val) AS rn,
COUNT(*) OVER(PARTITION BY groupid) AS cnt
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE ABS(2*rn - cnt - 1) <= 1
GROUP BY groupid;
[color=blue]--IN((cnt+1)/2, (cnt+2)/2)方法[/color]
WITH RN AS
(
SELECT groupid, val,
ROW_NUMBER() OVER(PARTITION BY groupid ORDER BY val) AS rn,
COUNT(*) OVER(PARTITION BY groupid) AS cnt
FROM dbo.Groups
)
SELECT groupid, AVG(1.*val) AS median
FROM RN
WHERE rn IN((cnt+1)/2, (cnt+2)/2)
GROUP BY groupid;
GO
[color=blue]--使用top的方法求中值[/color]SELECT DISTINCT groupid,
((SELECT MAX(val)
FROM (SELECT TOP 50 PERCENT val
FROM dbo.Groups AS H1
WHERE H1.groupid = G.groupid
ORDER BY val) AS M1)
+
(SELECT MIN(val)
FROM (SELECT TOP 50 PERCENT val
FROM dbo.Groups AS H2
WHERE H2.groupid = G.groupid
ORDER BY val DESC) AS M2))
/2. AS median
FROM dbo.Groups AS G;