机器学习——聚类算法的C++简易实现(k=3)

#include <iostream>
#include <math.h>
using namespace std;


//定义数据集
double dataSet[30][2] = {
    {0.697,0.460},
    {0.774,0.376},
    {0.634,0.264},
    {0.608,0.318},
    {0.556,0.215},
    {0.403,0.237},
    {0.481,0.149},
    {0.437,0.211},
    {0.666,0.091},
    {0.243,0.267},
    {0.245,0.057},
    {0.343,0.099},
    {0.639,0.161},
    {0.657,0.198},
    {0.360,0.370},
    {0.593,0.042},
    {0.719,0.103},
    {0.359,0.188},
    {0.339,0.241},
    {0.282,0.257},
    {0.748,0.232},
    {0.714,0.346},
    {0.483,0.312},
    {0.478,0.437},
    {0.525,0.369},
    {0.751,0.489},
    {0.532,0.472},
    {0.473,0.376},
    {0.725,0.445},
    {0.446,0.459}
};


//存储类别
double cata[30][1] = { 0 };

//定义中心点
double xx1;
double xx2 ;
double xx3 ;
double yy1 ;
double yy2 ;
double yy3 ;


//随机中心点
int h1, h2, h3;
void Center() {
    //取k个随机中心点,并且保证三个中心点不一样
    h1 = rand() % 30;
    h2 = rand() % 30;
    while (h2 == h1) h2 = rand() % 30;
    h3 = rand() % 30;
    while (h3 == h2 || h3 == h1) h3 = rand() % 30;

    //给中心点赋初值
    xx1 = dataSet[h1][0];
    xx2 = dataSet[h2][0];
    xx3 = dataSet[h3][0];
    yy1 = dataSet[h1][1];
    yy2 = dataSet[h2][1];
    yy3 = dataSet[h3][1];
}




//计算欧式距离
double EucD(double x1, double y1, double x2, double y2) {
    return sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2));
}

//选择距离最近的那一个并返回是第几类
double C(double t1, double t2, double t3) {
    double t = t1, z = 1.0;
    if (t2 < t) {
        t = t2;
        z = 2.0;
    }
    if (t3 < t) {
        t = t3;
        z = 3.0;
    }
    return z;
}


//迭代过程,
void Kmeans() {
    double t1, t2, t3;
    double sum11 = 0.0, sum12 = 0.0, sum13 = 0.0;
    double sum21 = 0.0, sum22 = 0.0, sum23 = 0.0;//记录标签值的累加
    double s1 = 0.0, s2 = 0.0, s3 = 0.0;//记录这一个类有多少个样本
    for (int i = 0; i < 30; i++) {    //计算每一个样本与三个中心的欧式距离,分到最近的那一类
        t1 = EucD(xx1, yy1, dataSet[i][0], dataSet[i][1]);
        t2 = EucD(xx2, yy2, dataSet[i][0], dataSet[i][1]);
        t3 = EucD(xx3, yy3, dataSet[i][0], dataSet[i][1]);
        cata[i][0] = C(t1, t2, t3); //记录第i个样本是第几类

        //根据样本的类别对标签值进行累加计算
        if (cata[i][0] == 1.0) {
            sum11 = sum11 + dataSet[i][0];
            sum21 = sum21 + dataSet[i][1];
            s1 = s1 + 1.0;
        }
        if (cata[i][0] == 2.0) {
            sum12 = sum12 + dataSet[i][0];
            sum22 = sum22 + dataSet[i][1];
            s2 = s2 + 1.0;
        }
        if (cata[i][0] == 3.0) {
            sum13 = sum13 + dataSet[i][0];
            sum23 = sum23 + dataSet[i][1];
            s3 = s3 + 1.0;
        }
    }

    //计算均值作为新的中心点
    xx1 = sum11/s1;
    xx2 = sum12/s2;
    xx3 = sum13/s3;
    yy1 = sum21/s1;
    yy2 = sum22/s2;
    yy3 = sum23/s3;
}



//第一次迭代,迭代的特例,在分类过程中不考虑三个中心点
void Kmeans1() {
    double t1, t2, t3;
    double sum11 = 0.0, sum12 = 0.0, sum13 = 0.0;
    double sum21 = 0.0, sum22 = 0.0, sum23 = 0.0;//记录标签值的累加
    double s1 = 0.0, s2 = 0.0, s3 = 0.0;//记录这一个类有多少个样本
    for (int i = 0; i < 30; i++) { //计算每一个样本与三个中心的欧式距离,分到最近的那一类
        if (i != 5 && i != 13 && i != 17) {
            t1 = EucD(xx1, yy1, dataSet[i][0], dataSet[i][1]);
            t2 = EucD(xx2, yy2, dataSet[i][0], dataSet[i][1]);
            t3 = EucD(xx3, yy3, dataSet[i][0], dataSet[i][1]);
            cata[i][0] = C(t1, t2, t3);   //记录第i个样本是第几类

            //根据样本的类别对标签值进行累加计算
            if (cata[i][0] == 1.0) {
                sum11 = sum11 + dataSet[i][0];
                sum21 = sum21 + dataSet[i][1];
                s1 = s1 + 1.0;
            }
            if (cata[i][0] == 2.0) {
                sum12 = sum12 + dataSet[i][0];
                sum22 = sum22 + dataSet[i][1];
                s2 = s2 + 1.0;
            }
            if (cata[i][0] == 3.0) {
                sum13 = sum13 + dataSet[i][0];
                sum23 = sum23 + dataSet[i][1];
                s3 = s3 + 1.0;
            }
        }
    }
    xx1 = sum11 / s1;
    xx2 = sum12 / s2;
    xx3 = sum13 / s3;
    yy1 = sum21 / s1;
    yy2 = sum22 / s2;
    yy3 = sum23 / s3;
    cata[h1][0] = 1.0;
    cata[h2][0] = 2.0;
    cata[h3][0] = 3.0;
}



int main() {
    double cata1[30][1];//拷贝数组,记录分类
    double equ;
    Center(); //初始化中心值
    Kmeans1(); //第一次迭代
    while (1) {
        equ = 0;
        memcpy(cata1, cata, sizeof cata);   //拷贝分类结果以便后面进行比较
        Kmeans();  //迭代
        //比较上一次分类结果与这一次分类结果,如果出现不同的项就另equ等于1,继续迭代
        for (int i = 0; i < 30; i++) {
            if (cata1[i][0] != cata[i][0]) {
                equ = 1;
                break;
            }
        }
        if (equ == 0) break;  //equ等于0说明这一次分类结果与上一次的分类结果一样,分类完成结束迭代
    }


    //将分类结果输出
    cout << "第一个类有:";
    for (int i = 0; i < 30; i++)
        if (cata[i][0] == 1.0)
            cout << i+1 << ",";
    cout << endl;

    cout << "第二个类有:";
    for (int i = 0; i < 30; i++)
        if (cata[i][0] == 2.0)
            cout << i+1 << ",";
    cout << endl;

    cout << "第三个类有:";
    for (int i = 0; i < 30; i++)
        if (cata[i][0] == 3.0)
            cout << i+1 << ",";
    cout << endl;

}
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值