C#---聚类

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Wellcomm.BLL.Geometric;

namespace Wellcomm.BLL.InternalInterference
{
    public class Cluster
    {
        Random r;

        public Cluster()
        {
            r = new Random();
        }

        // 计算两个点之间的欧氏距离
        double distEclud(Point p1, Point p2)
        {
            return Math.Sqrt(Math.Pow(p1.X - p2.X, 2) + Math.Pow(p1.Y - p2.Y, 2));
        }

        // 生成随机数
        double randVal(double min, double max)
        {
            return (double)(r.NextDouble() * (max - min) + min);
        }

        // 为给定的数据集构建一个包含k个随机质心的集合
        List<Point> randCnt(ref List<Point> dataSet, int k)
        {
            List<Point> centoids = new List<Point>();

            double minX = dataSet[0].X;
            double minY = dataSet[0].Y;
            double maxX = dataSet[0].X;
            double maxY = dataSet[0].Y;
            for (int i = 1; i < dataSet.Count; i++)
            {
                if (minX > dataSet[i].X)
                    minX = dataSet[i].X;
                if (minY > dataSet[i].Y)
                    minY = dataSet[i].Y;
                if(maxX < dataSet[i].X)
                    maxX = dataSet[i].X;
                if(maxY < dataSet[i].Y)
                    maxY = dataSet[i].Y;
            }

            // 随机质心
            for (int i = 0; i < k; i++)
            {
                Point p = new Point();
                p.X = randVal(minX, maxX);
                p.Y = randVal(minY, maxY);
                p.Z = 0;
                centoids.Add(p);
            }

            return centoids;
        }

        // k均值聚类
        void kMeans(List<Point> dataSet, int k, ref List<Point> centroid, ref double[,] clusterAssment)
        {
            int m = dataSet.Count;  // 数据点数
            //clusterAssment = new double[m, 2];  // 簇分配结果矩阵,一列记录簇索引,一列存储误差
            //centroid = randCnt(ref dataSet, k);
            bool clusterChanged = true;

            // 计算质心- 分配- 重新计算 反复迭代
            while (clusterChanged)
            {
                clusterChanged = false;
                for (int i = 0; i < m; i++)
                {
                    double minDis = Double.MaxValue;
                    int minIndex = -1;

                    // 寻找最近的质心
                    for(int j=0; j<k; j++)
                    {
                        double dist = distEclud(dataSet[i], centroid[j]);
                        if (dist < minDis)
                        {
                            minDis = dist;
                            minIndex = j;
                        }
                    }

                    // 更新
                    if (clusterAssment[i, 0] != minIndex)
                    {
                        clusterChanged = true;
                    }

                    clusterAssment[i, 0] = minIndex;
                    clusterAssment[i, 1] = minDis * minDis;
                }

                // 更新质心位置
                for (int cent = 0; cent < k; cent++)
                {
                    double avgX = 0, avgY = 0, cnt = 0;
                    for (int i = 0; i < m; i++)
                    {
                        if (clusterAssment[i, 0] == cent)
                        {
                            avgX += dataSet[i].X;
                            avgY += dataSet[i].Y;
                            cnt++;
                        }
                    }
                    centroid[cent].X = avgX / cnt;
                    centroid[cent].Y = avgY / cnt;
                }
            }
        }

        // 二分k均值聚类算法
        public void biKeans(ref List<Point> dataSet, int k, ref double[,] clusterAssment, ref List<Point> centList)
        {
            int m = dataSet.Count; 
            //double[,] clusterAssment = new double[m, k];   // 第一列存储簇分配结果,第二列存储平方误差
            //List<Point> centList = new List<Point>();      // 存储所有质心

            // 找到第一个质心
            double centroidX = 0, centroidY = 0;
            for (int i = 0; i < m; i++)
            {
                centroidX += dataSet[i].X;
                centroidY += dataSet[i].Y;
            }
            Point cent = new Point(centroidX / m, centroidY / m, 0);
            centList.Add(cent);

            // 计算数据集中所有点到质心的误差
            for (int j = 0; j < m; j++)
            {
                clusterAssment[j, 1] = distEclud(dataSet[j], cent);
            }

            // 不停地对每个簇进行划分,直到得到想要的簇的数目
            while (centList.Count < k)
            {
                double lowestSSE = Double.MaxValue;
                int bestCentertoSplit = 0;
                double[,] bestClusAss = null;
                List<Point> bestNewCenter = null;
                int bestN = 0;
                int cnt = centList.Count;

                // 尝试划分每一簇
                for (int i = 0; i < cnt; i++)
                {
                    // 得到位于当前簇中的点
                    List<Point> ptsInCurrCluster = new List<Point>();  
                    for (int j = 0; j < m; j++)
                    {
                        if (clusterAssment[j, 0] == i)
                        {
                            ptsInCurrCluster.Add(dataSet[j]);
                        }
                    }
                    int n = ptsInCurrCluster.Count;

                    // 生成两个簇
                    List<Point> centroidMat = randCnt(ref ptsInCurrCluster, 2); 
                    double[,] splitClustAss = new double[ptsInCurrCluster.Count,2];
                    kMeans(ptsInCurrCluster, 2, ref centroidMat, ref splitClustAss);     

                    // 误差和
                    double sseSplit = 0, sseNotSplit = 0;
                    for (int j = 0; j < n; j++)
                        sseSplit += splitClustAss[j, 1];
                    for (int j = 0; j < m; j++)
                        if (clusterAssment[j, 0] != i)
                            sseNotSplit += clusterAssment[j, 1];

                    // 记录最佳划分
                    if (sseSplit + sseNotSplit < lowestSSE)
                    {
                        bestCentertoSplit = i; ;
                        bestNewCenter = centroidMat;
                        bestClusAss = splitClustAss;
                        bestN = n;
                    }
                }

                // 将要划分的簇中的点的簇分配结果进行修改
                for(int i=0; i<bestN; i++)
                {
                    if (bestClusAss[i, 0] == 0)   
                        bestClusAss[i, 0] = bestCentertoSplit;
                    else
                        bestClusAss[i, 0] = centList.Count;
                }

                // 修改质心列表
                centList[bestCentertoSplit] = bestNewCenter[0];
                centList.Add(bestNewCenter[1]);

                // 修改平方误差
                int kk = 0;
                for (int i = 0; i < m; i++)
                {
                    if (clusterAssment[i, 0] == bestCentertoSplit)
                    {
                        clusterAssment[i, 0] = bestClusAss[kk, 0];
                        clusterAssment[i, 1] = bestClusAss[kk, 1];
                        ++kk;
                    }
                }
            }
        }
    }
}

对圆的交点进行聚类
圆为蓝色,聚类结果为黑色

调用

// 对交点进行聚类
int K = 3;  // 聚类个数
double[,] clusterAssment = new double[crossPts.Count, 2];   // 第一列存储簇分配结果,第二列存储平方误差
List<Wellcomm.BLL.Geometric.Point> centList = new List<Wellcomm.BLL.Geometric.Point>();      // 存储所有质心
Cluster cluster = new Cluster();
cluster.biKeans(ref crossPts, K, ref clusterAssment, ref centList);

这里写图片描述

参考
机器学习实战第10章

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值