--预备步骤
SELECT t1.X
,t1.Y
,CASE WHEN t1.A<t1.B AND t1.A<t1.C THEN 'A'
WHEN t1.B<t1.A AND t1.B<t1.C THEN 'B'
WHEN t1.C<t1.A AND t1.C<t1.B THEN 'C' END Distance
,1 AS Ver
from
(SELECT X
,Y
,SQRT(power(x-1,2)+POWER(y-1,2)) A
,SQRT(power(x-4,2)+POWER(y-4,2)) B
,SQRT(power(x-7,2)+POWER(y-7,2)) C
FROM dbo.[K-means]) t1;
--存储过程
alter PROC proc_KMeans
AS
DECLARE @AvgX DEC(10,6),@AvgY DEC(10,6);
DECLARE @AvgX2 DEC(10,6),@AvgY2 DEC(10,6);
DECLARE @AvgX3 DEC(10,6),@AvgY3 DEC(10,6);
DECLARE @i INT=2;
WHILE @i<=30
BEGIN
SELECT @AvgX=AVG(k.X),@AvgY=AVG(k.Y) FROM dbo.[K-means] k WHERE Ver=@i-1 AND k.Distance='A';
SELECT @AvgX2=AVG(k.X),@AvgY2=AVG(k.Y) FROM dbo.[K-means] k WHERE Ver=@i-1 AND k.Distance='B';
SELECT @AvgX3=AVG(k.X),@AvgY3=AVG(k.Y) FROM dbo.[K-means] k WHERE Ver=@i-1 AND k.Distance='C';
INSERT INTO dbo.[K-means]
SELECT t1.X
,t1.Y
,CASE WHEN t1.A<t1.B AND t1.A<t1.C THEN 'A'
WHEN t1.B<t1.A AND t1.B<t1.C THEN 'B'
WHEN t1.C<t1.A AND t1.C<t1.B THEN 'C' END Distance
,@i AS Ver
from
(SELECT X
,Y
,SQRT(power(x-@AvgX,2)+POWER(y-@AvgY,2)) A
,SQRT(power(x-@AvgX2,2)+POWER(y-@AvgY2,2)) B
,SQRT(power(x-@AvgX3,2)+POWER(y-@AvgY3,2)) C
FROM dbo.[K-means] WHERE Ver=@i-1) t1;
SET @i=@i+1;
END
SELECT * FROM dbo.[K-means] WHERE Ver=30;
大约在22代的时候收敛。
--这个是查看收敛情况的
SELECT k.Ver,k.Distance,COUNT(*) c,AVG(k.X) X,AVG(k.Y) Y FROM dbo.[K-means] k
GROUP BY k.Ver,k.Distance
ORDER BY k.Ver,k.Distance;