/*
1.选取我们的初始的中心点的个数
2.计算剩余的点到中心点的距离(曼哈顿距离公式)
3.将距离到中心点的距离最短的归为一类
4.重新计算中心点
5.重复3,4两个步骤,直到中心点不会变化为止
*/
CREATE TABLE #T
(
p VARCHAR(10),
x DECIMAL(18,6),
y DECIMAL(18,6),
j INT
)
INSERT INTO #t(p,x,y) VALUES('p1',7,7)
INSERT INTO #t(p,x,y) VALUES('p2',2,3)
INSERT INTO #t(p,x,y) VALUES('p3',6,8)
INSERT INTO #t(p,x,y) VALUES('p4',1,4)
INSERT INTO #t(p,x,y) VALUES('p5',1,2)
INSERT INTO #t(p,x,y) VALUES('p6',3,1)
INSERT INTO #t(p,x,y) VALUES('p7',6,9)
INSERT INTO #t(p,x,y) VALUES('p8',8,8)
INSERT INTO #t(p,x,y) VALUES('p9',9,10)
INSERT INTO #t(p,x,y) VALUES('p10',5,5)
INSERT INTO #t(p,x,y) VALUES('p11',7,6)
INSERT INTO #t(p,x,y) VALUES('p12',9,3)
INSERT INTO #t(p,x,y) VALUES('p13',2,8)
INSERT INTO #t(p,x,y) VALUES('p14',5,11)
INSERT INTO #t(p,x,y) VALUES('p15',5,2)
DECLARE @K INT =2
SELECT p,x,y,IDENTITY(INT,1,1) AS j INTO #T2 FROM
(
SELECT *,COUNT(*) OVER(PARTITION BY i) AS m,ROW_NUMBER() OVER(PARTITION BY i ORDER BY x+y) n
FROM
(
SELECT * ,NTILE(@K) OVER(ORDER BY x+y) AS i
FROM #T
) a
) b
WHERE n=m/@K
WHILE @@ROWCOUNT>0
BEGIN
UPDATE f SET j = e.j
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY p1 ORDER BY z) AS z2 FROM
(
SELECT a.p AS p1,b.p AS p2, ABS(a.x-b.x)+ABS(a.y-b.y) AS z
FROM #T a cross JOIN #T2 b
) c
) d
INNER JOIN #t2 e ON d.p2 = e.p
INNER JOIN #t f ON f.p = d.p1
WHERE z2=1
UPDATE a SET x = b.x ,y=b.y
FROM #T2 a
INNER JOIN
(
SELECT * FROM
(
SELECT j,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY x) OVER(PARTITION BY j ) AS x,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY y) OVER(PARTITION BY j ) AS y
FROM #T
) aa
GROUP BY j,x,y
) b ON a.j = b.j
WHERE a.x <> b.x AND a.y<>b.y
END
SELECT * FROM #T ORDER BY j
DROP TABLE #T
DROP TABLE #T2
K-Median聚类算法
于 2024-04-12 17:01:46 首次发布