usingSystem;usingSystem.Collections.Generic;usingSystem.Text;usingSystem.Drawing;namespaceClustering.Demo
{//利用点坐标(二维向量)来演示k-means算法///classKmeans
{//点的坐标数组(coordinates)///privatestaticreadonlyint[,] coords=newint[,]
{
{0,0}, {1,0}, {0,1}, {1,1}, {2,1}, {1,2}, {2,2}, {3,2}, {6,6}, {7,6},
{8,6}, {6,7}, {7,7}, {8,7}, {9,7}, {7,8}, {8,8}, {9,8}, {8,9}, {9,9},//{ 12, 12 }, { 12, 13 }, { 13, 14 }, { 14, 14 }, { 14, 15 }, { 15, 16 }, { 16, 14 }, { 17, 15 }, { 16, 18 }, { 16, 17 },};//聚类的数量///privatestaticreadonlyintk=2;/publicstaticvoidCluster()
{
Point[] points=GetPoints();//获取原始数据点PointF[] means=newPointF[k];//当前的聚类中心PointF[] prevMeans=newPointF[k];//之前的聚类中心int[] pointAssigns=newint[points.Length];//每个点所属的类InitMeans(points, k, means);//随机选k个点作为聚类中心PrintInit(points, k, means);//打印初始数据intiter=0;//迭代次数(iteration times)while(true)
{
Classify(points, k, means, pointAssigns);//将每个点与各个聚类中心对比进行归类划分intconv=0;//收敛类数(convergent clusters)for(intj=0; j
{
means[j]=CalcMeans(points, pointAssigns, j);//重新计算聚类中心if(Compare(means[j], prevMeans[j]))
conv++;elseprevMeans[j]=means[j];
}if(conv==k)//新旧聚类中心一样表示该聚类已收敛至平衡{break;
}
iter++;
PrintIter(points, k, means, pointAssigns, iter);//打印迭代数据}
}//初始化点数组(根据自定义坐标)///privatestaticPoint[] GetPoints()
{intlen=coords.GetLength(0);
Point[] points=newPoint[len];for(inti=0; i
{
points[i]=newPoint(coords[i,0], coords[i,1]);
}returnpoints;
}//随机选k个点作为聚类中心///privatestaticvoidInitMeans(Point[] points,intk, PointF[] means)
{
Random random=newRandom();for(inti=0; i
{
means[i]=points[random.Next(points.Length)];
}
}//将每个点与各个聚类中心对比进行归类划分///privatestaticvoidClassify(Point[] points,intk, PointF[] means,int[] pointAssigns)
{for(inti=0; i
{doubleminDist=double.MaxValue;//最短距离for(intj=0; j
{doubledist=Distance(points[i], means[j]);if(dist
{
minDist=dist;
pointAssigns[i]=j;
}
}//Console.WriteLine("{0}归入类{1}", points[i], pointAssigns[i]);}
}#region欧氏距离(Euclidean distance)//计算欧氏距离(勾股定理的多维空间扩展)/勾股定理: x^2 + y^2 = z^2///欧氏距离: distance = sqrt(x1^2 + x2^2 + ... + xn^2)///欧氏距离的变换式: x1^2 + x2^2 + ... + xn^2 = distance^2//二维空间的两点p1(x1,y1)和p(x2,y2),两点间距离dist=sqrt((x1-x2)^2+(y1-y2)^2)///privatestaticdoubleDistance(PointF p1, PointF p2)
{doublepow2X=Math.Pow(p1.X-p2.X,2);doublepow2Y=Math.Pow(p1.Y-p2.Y,2);returnMath.Sqrt(pow2X+pow2Y);
}#endregion//计算新的聚类中心(均值)/meanX = (x1 + x2 + ... + xn) / n///meanY = (y1 + y2 + ... + yn) / n///privatestaticPointF CalcMeans(Point[] points,int[] pointAssigns,intj)
{
PointF mean=newPointF();intn=0;for(inti=0; i
{if(pointAssigns[i]==j)
{
mean.X+=points[i].X;
mean.Y+=points[i].Y;
n++;
}
}
mean.X/=(float)n;
mean.Y/=(float)n;returnmean;
}//比较两个聚类中心是否相等///privatestaticboolCompare(PointF a, PointF b)
{if(((int)(a.X*10)==(int)(b.X*10))&&((int)(a.Y*10)==(int)(b.Y*10)))
{returntrue;
}else{returnfalse;
}
}#region打印数据(print datas)//打印初始数据///privatestaticvoidPrintInit(Point[] points,intk, PointF[] means)
{
Console.WriteLine("总共{0}个样本:", points.Length);for(inti=0; i
{
Console.WriteLine("{0}, {1}", points[i].X, points[i].Y);
}
Console.WriteLine("\n初始化时随机选取k个样本作为聚类中心:");for(inti=0; i
{
Console.WriteLine("聚类{0}的中心: {1}, {2}", i, means[i].X, means[i].Y);
}
}//打印迭代数据///privatestaticvoidPrintIter(Point[] points,intk, PointF[] means,int[] pointAssigns,intiter)
{
Console.WriteLine("\n\n--------第{0}次迭代的结果--------", iter);for(intj=0; j
{
Console.WriteLine("\n第{0}个类的成员:", j);for(inti=0; i
{if(pointAssigns[i]==j)
{
Console.WriteLine("{0}, {1}", points[i].X, points[i].Y);
}
}
Console.WriteLine("均值(聚类中心): {0}, {1}", means[j].X, means[j].Y);
}
}#endregion}
}