using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Wellcomm.BLL.Geometric;
namespace Wellcomm.BLL.InternalInterference
{
public class Cluster
{
Random r;
public Cluster()
{
r = new Random();
}
// 计算两个点之间的欧氏距离
double distEclud(Point p1, Point p2)
{
return Math.Sqrt(Math.Pow(p1.X - p2.X, 2) + Math.Pow(p1.Y - p2.Y, 2));
}
// 生成随机数
double randVal(double min, double max)
{
return (double)(r.NextDouble() * (max - min) + min);
}
// 为给定的数据集构建一个包含k个随机质心的集合
List<Point> randCnt(ref List<Point> dataSet, int k)
{
List<Point> centoids = new List<Point>();
double minX = dataSet[0].X;
double minY = dataSet[0].Y;
double maxX = dataSet[0].X;
double maxY = dataSet[0].Y;
for (int i = 1; i < dataSet.Count; i++)
{
if (minX > dataSet[i].X)
minX = dataSet[i].X;
if (minY > dataSet[i].Y)
minY = dataSet[i].Y;
if(maxX < dataSet[i].X)
maxX = dataSet[i].X;
if(maxY < dataSet[i].Y)
maxY = dataSet[i].Y;
}
// 随机质心
for (int i = 0; i < k; i++)
{
Point p = new Point();
p.X = randVal(minX, maxX);
p.Y = randVal(minY, maxY);
p.Z = 0;
centoids.Add(p);
}
return centoids;
}
// k均值聚类
void kMeans(List<Point> dataSet, int k, ref List<Point> centroid, ref double[,] clusterAssment)
{
int m = dataSet.Count; // 数据点数
//clusterAssment = new double[m, 2]; // 簇分配结果矩阵,一列记录簇索引,一列存储误差
//centroid = randCnt(ref dataSet, k);
bool clusterChanged = true;
// 计算质心- 分配- 重新计算 反复迭代
while (clusterChanged)
{
clusterChanged = false;
for (int i = 0; i < m; i++)
{
double minDis = Double.MaxValue;
int minIndex = -1;
// 寻找最近的质心
for(int j=0; j<k; j++)
{
double dist = distEclud(dataSet[i], centroid[j]);
if (dist < minDis)
{
minDis = dist;
minIndex = j;
}
}
// 更新
if (clusterAssment[i, 0] != minIndex)
{
clusterChanged = true;
}
clusterAssment[i, 0] = minIndex;
clusterAssment[i, 1] = minDis * minDis;
}
// 更新质心位置
for (int cent = 0; cent < k; cent++)
{
double avgX = 0, avgY = 0, cnt = 0;
for (int i = 0; i < m; i++)
{
if (clusterAssment[i, 0] == cent)
{
avgX += dataSet[i].X;
avgY += dataSet[i].Y;
cnt++;
}
}
centroid[cent].X = avgX / cnt;
centroid[cent].Y = avgY / cnt;
}
}
}
// 二分k均值聚类算法
public void biKeans(ref List<Point> dataSet, int k, ref double[,] clusterAssment, ref List<Point> centList)
{
int m = dataSet.Count;
//double[,] clusterAssment = new double[m, k]; // 第一列存储簇分配结果,第二列存储平方误差
//List<Point> centList = new List<Point>(); // 存储所有质心
// 找到第一个质心
double centroidX = 0, centroidY = 0;
for (int i = 0; i < m; i++)
{
centroidX += dataSet[i].X;
centroidY += dataSet[i].Y;
}
Point cent = new Point(centroidX / m, centroidY / m, 0);
centList.Add(cent);
// 计算数据集中所有点到质心的误差
for (int j = 0; j < m; j++)
{
clusterAssment[j, 1] = distEclud(dataSet[j], cent);
}
// 不停地对每个簇进行划分,直到得到想要的簇的数目
while (centList.Count < k)
{
double lowestSSE = Double.MaxValue;
int bestCentertoSplit = 0;
double[,] bestClusAss = null;
List<Point> bestNewCenter = null;
int bestN = 0;
int cnt = centList.Count;
// 尝试划分每一簇
for (int i = 0; i < cnt; i++)
{
// 得到位于当前簇中的点
List<Point> ptsInCurrCluster = new List<Point>();
for (int j = 0; j < m; j++)
{
if (clusterAssment[j, 0] == i)
{
ptsInCurrCluster.Add(dataSet[j]);
}
}
int n = ptsInCurrCluster.Count;
// 生成两个簇
List<Point> centroidMat = randCnt(ref ptsInCurrCluster, 2);
double[,] splitClustAss = new double[ptsInCurrCluster.Count,2];
kMeans(ptsInCurrCluster, 2, ref centroidMat, ref splitClustAss);
// 误差和
double sseSplit = 0, sseNotSplit = 0;
for (int j = 0; j < n; j++)
sseSplit += splitClustAss[j, 1];
for (int j = 0; j < m; j++)
if (clusterAssment[j, 0] != i)
sseNotSplit += clusterAssment[j, 1];
// 记录最佳划分
if (sseSplit + sseNotSplit < lowestSSE)
{
bestCentertoSplit = i; ;
bestNewCenter = centroidMat;
bestClusAss = splitClustAss;
bestN = n;
}
}
// 将要划分的簇中的点的簇分配结果进行修改
for(int i=0; i<bestN; i++)
{
if (bestClusAss[i, 0] == 0)
bestClusAss[i, 0] = bestCentertoSplit;
else
bestClusAss[i, 0] = centList.Count;
}
// 修改质心列表
centList[bestCentertoSplit] = bestNewCenter[0];
centList.Add(bestNewCenter[1]);
// 修改平方误差
int kk = 0;
for (int i = 0; i < m; i++)
{
if (clusterAssment[i, 0] == bestCentertoSplit)
{
clusterAssment[i, 0] = bestClusAss[kk, 0];
clusterAssment[i, 1] = bestClusAss[kk, 1];
++kk;
}
}
}
}
}
}
对圆的交点进行聚类
圆为蓝色,聚类结果为黑色
调用
// 对交点进行聚类
int K = 3; // 聚类个数
double[,] clusterAssment = new double[crossPts.Count, 2]; // 第一列存储簇分配结果,第二列存储平方误差
List<Wellcomm.BLL.Geometric.Point> centList = new List<Wellcomm.BLL.Geometric.Point>(); // 存储所有质心
Cluster cluster = new Cluster();
cluster.biKeans(ref crossPts, K, ref clusterAssment, ref centList);
参考
机器学习实战第10章