聚类算法Kmeans

C++实现Kmeans算法

最近在做一个医学影像处理相关的项目,用到了聚类算法,记录一下

聚类算法Kmeans

算法会将数据集分为 K 个簇,每个簇使用簇内所有样本均值来表示,将该均值称为“质心”:
优点:原理简单,容易实现;聚类速度快;
缺点:易受初始质心的影响;算法可能收敛到局部最优或不收敛;

改进

选择初始质心:初始的聚类中心之间的相互距离要尽可能的远。
1、从样本中选择 1 个点作为初始质心(完全随机)
2、对于任意一个非质心样本 x,计算x与现有最近质心距离 D(x)
3、基于距离计算概率,来选择下一个质心 x,选择距离当前质心远的点作为质心
4、重复步骤 2 与 3 ,直到选择 k 个质心为止。

Kmeans.h

#pragma once

#ifndef K_MEANS_H
#define K_MEANS_H
#include<vector>
using namespace std;


struct Tuple
{
	float x;
	float y;
	float z;

};//The latitude of a cluster object is defined as three dimensions, and larger dimensions can be represented by dynamic arrays

class K_means
{
private:
	vector<Tuple> basic_data;//initialization data
	int mean_number;//k number	
	vector<vector<Tuple>> result_data;//{ mean_number };//The resulting data is also used to store intermediate clustering data
	vector<Tuple> means;//The cluster center of the intermediate computation store
	vector<float> mea_square_error;//Mean square deviation of each cluster


public:
	K_means(vector<Tuple> basic_data, int mean_number) : result_data(mean_number), mea_square_error(mean_number)
	{
		this->basic_data = basic_data;
		this->mean_number = mean_number;
	}
	//Calculate the final clustering result
	vector<Tuple> get_fina_result_data();
	
private:
	//Threshold selection
	//vector<Tuple> threshold_selection(int x, int y, int z);
	//Calculate the distance between two points
	float get_distxyz(Tuple tuple1, Tuple tuple2);
	//Initialize k cluster centers
	void prime_k_meansbase();
	//Initialize to get K clusters
	void get_k_means();
	//Get the current class center of each cluster
	void get_basemean();
	//Get new cluster
	void get_new_k_means();
	//You get the variance of each of these clusters
	void get_k_mean_erro();
};
#endif

Kmeans.cpp

#include <io.h>  
#include<algorithm>
#include<iostream>
#include<vector>
#include<cmath>
#include"kmeans.h"
#include <string>
#include <map>
#include<sstream>
#include <string>
using namespace std;

#define FLT_MAXX 3.402823466e+38F

float K_means::get_distxyz(Tuple tuple1, Tuple tuple2)           //Calculate the Euclidean distance between points
{
    int distance = pow(pow((tuple1.x - tuple2.x), 2) + pow((tuple1.y -
        tuple2.y), 2) + pow((tuple1.z - tuple2.z), 2), 0.5);
    return distance;
}
//Initialize k center points    
void K_means::prime_k_meansbase()
{   
    
    struct timeb timeSeed;
    ftime(&timeSeed);
    srand(timeSeed.time * 1000 + timeSeed.millitm);
    //srand((unsigned)time(0));
    int max_size = basic_data.size();
    int rand1 = rand() % max_size;
    cout << "index0: " << rand1 << endl;
    means.push_back(basic_data[rand1]);

    for (int t = 1; t != mean_number; t++)
    {
        int c_size = t;
        float maxs = 0;
        int index = -1;
        for (int i = 0; i < basic_data.size(); i++) {
            float sum = 0;
            for (int j = 0; j < c_size; j++) {
                sum += pow(pow((basic_data[i].x - means[j].x), 2) + pow((basic_data[i].y - means[j].y), 2)
                    + pow((basic_data[i].z - means[j].z), 2), 0.5);
            }

            if (sum > maxs) {
                maxs = sum;
                index = i;
            }
        }
        //cout << "index: " << index << " ";
        means.push_back(basic_data[index]);


    }
}

//get k clusters
void K_means::get_k_means()
{
    for (int i = 0; i != basic_data.size(); ++i)
    {
        float min_distance = 0x3f3f3f3f;
        int static temp;
        for (int j = 0; j != mean_number; ++j)
        {
            if (min_distance > get_distxyz(basic_data[i], means[j]))
            {
                min_distance = get_distxyz(basic_data[i], means[j]);
                temp = j;
                //cout << temp << endl;;
            }
        }
        result_data[temp].push_back(basic_data[i]);
    }
}

//Compute the center of the cluster
void K_means::get_basemean()
{
    vector<Tuple> A_mean;
    for (int j = 0; j < mean_number; j++)
    {
        float sumx = 0;
        float sumy = 0;
        float sumz = 0;
        int num = result_data[j].size();
        A_mean = result_data[j];
        //cout << num << endl;
        for (int i = 0; i < num; i++)
        {
            sumx += A_mean[i].x;
            sumy += A_mean[i].y;
            sumz += A_mean[i].z;
        }
        Tuple basemean = { sumx / num,sumy / num,sumz / num };
        means[j] = basemean;
    }

}

//Recalculated cluster
void K_means::get_new_k_means()
{
    result_data.clear();
    result_data = vector<vector<Tuple>>(mean_number);
    for (int i = 0; i != basic_data.size(); ++i)
    {
        float min_distance = 0x3f3f3f3f;
        int static temp2;
        for (int j = 0; j != mean_number; ++j)
        {
            if (min_distance > get_distxyz(basic_data[i], means[j]))
            {
              
                min_distance = get_distxyz(basic_data[i], means[j]);
                temp2 = j;
            }
        }
        result_data[temp2].push_back(basic_data[i]);
    }
}

//Calculate the variance
void K_means::get_k_mean_erro()
{
    vector<float> square_error;
    for (int j = 0; j < mean_number; j++)
    {
        float sum = 0;
        int num = result_data[j].size();

        vector<Tuple> A_mean = result_data[j];
        Tuple basemean = means[j];

        for (int i = 0; i < num; i++)
        {
            sum += pow(get_distxyz(A_mean[i], basemean), 2);
        }
        float result = sum / num;
        mea_square_error[j] = result;
    }
}




//Calculate the final result

vector<Tuple> K_means::get_fina_result_data()
{
    //Initialize
    prime_k_meansbase();
   
    get_k_means();
  
    get_k_mean_erro();
    
    int i = 1;
    int number = 0;
    int temp = -1;
    vector<Tuple> appr_mean;

    while (i)
    {   
        number++;
        vector<float> last_k_mean_erro = mea_square_error;
       
        get_basemean();
        get_new_k_means();
        get_k_mean_erro();
        
        int count_noerror = 0;

        if (number == 2000) {
            LOG4CXX_DEBUG(VBNLogger, "K-means Algorithm failure");
            return appr_mean;
        }
        else if (number %200 == 0) {

            means.clear();
            prime_k_meansbase();
            get_k_means();
            get_k_mean_erro();
            cout << "number:" << number << endl;
         
        }
        else{
            if (mea_square_error == last_k_mean_erro) {
                i = 0;
                for (int j = 0; j != mean_number; ++j)
                {
                    
                    if (last_k_mean_erro[j] < 100) {
                        count_noerror++;
                    }
                }
               

                if (count_noerror != mean_number) {
                    if (temp < count_noerror) {
                        appr_mean = means;
                        //temp = count_noerror;
                    }

                    temp = count_noerror;
                    means.clear();
                    /*result_data.clear();
                    mea_square_error.clear();*/
                    prime_k_meansbase();
                    get_k_means();
                    get_k_mean_erro();
                    i = 1;
                }
            }
        }
    }
    return means;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值