聚类算法的学习研究与实现

所有代码均已测试,复制粘贴即可运行。

头文件如下:

#ifndef CLASTER_H
#define CLASTER_H

#include "common.h"

//聚类算法编写
//	http://blog.csdn.net/cai0538/article/details/7061922
//	http://blog.csdn.net/qll125596718/article/details/8243404
class Tuple
{
public:
	float axis_x;
	float axis_y;
};

class clasterOne
{
private:
	const int k;
	vector<Tuple> tuples;
public:
	clasterOne(int,vector<Tuple>);
	float getDistXY( Tuple a, Tuple b );
	int clusterOfTuple( Tuple means[], Tuple tuple );
	float getVar( vector<Tuple> clusters[], Tuple means[] );
	Tuple getMeans( vector<Tuple> cluster );
	void KMeans( );	
};

void clasterOneTest();




#endif

CPP文件如下:

#include "cluster.h"

void clasterOneTest()
{
	char fname[256] = "data.txt";
	int K;
	//cout<<"Please input the file name: ";
	//cin>>fname;
	cout<<endl;
	cout<<"Please input the k num:";
	cin>>K;
	cout<<endl;
	ifstream infile;
	infile.open(fname,ios::in);
	if( !infile ){
		cout<<"Cannot open the file-"<<fname<<endl;
		return;
	}
	int count = 0;
	vector<Tuple> tuples;
	Tuple tuple;
	while( !infile.eof() )
	{
		count++;
		if( count%2 == 1 )
			infile>>tuple.axis_x;
		else{
			infile>>tuple.axis_y;
			tuples.push_back(tuple);
		}
	}
	cout<<"Now, you have the following numbers:"<<endl;
	for( vector<Tuple>::size_type ix = 0; ix != tuples.size(); ++ix ){
		cout<<"("<<tuples[ix].axis_x<<","<<tuples[ix].axis_y<<")"<<"	";
	}
	cout<<endl;
	clasterOne cOne( K, tuples );
	cout<<"Begin to cluster:"<<endl;
	cOne.KMeans();
	
	return;
}


clasterOne::clasterOne( int k ,vector<Tuple> vT ):k(k),tuples(vT){  }

float clasterOne::getDistXY( Tuple a, Tuple b )
{
	return sqrt( (a.axis_x - b.axis_x)*(a.axis_x - b.axis_x)+(a.axis_y - b.axis_y)*(a.axis_y - b.axis_y) );
}

int clasterOne::clusterOfTuple( Tuple means[], Tuple tuple )
{
	float dist = getDistXY( means[0], tuple );
	float tmp;
	int label = 0;
	for( int i = 1; i < k; i++ )
	{
		tmp = getDistXY( means[i], tuple );
		if( tmp < dist ) { dist = tmp; label = i; }
	}
	return label;
}

float clasterOne::getVar( vector<Tuple> clusters[], Tuple means[] )
{
	float var = 0;
	for( int i = 0; i < k; i++ )
	{
		vector<Tuple> t = clusters[i];
		for( int j = 0; j < t.size(); j++ ){
			var += getDistXY( t[j], means[i] );
		}
	}
	return var;
}

Tuple clasterOne::getMeans( vector<Tuple> cluster )
{
	int num = cluster.size();
	double meansX = 0,meansY = 0;
	Tuple t;
	for( int i =0; i< num; i++ ){
		meansX += cluster[i].axis_x;
		meansY += cluster[i].axis_y;
	}
	t.axis_x = meansX / num;
	t.axis_y = meansY / num;
	return t;
}

void clasterOne::KMeans( )
{
	vector<Tuple> *clusters= new vector<Tuple>[k]; // 向量数组,一个数组元素是一个vector向量
	Tuple *means = new Tuple[k];   // 用于保存各个簇的中心(均值)
	int i =0;
	for( i = 0; i < k; i++ ){	// 初始并没有簇,那么均值设置为前k个向量的值,意思是默认地,将前K个向量作为簇中心
		means[i].axis_x = tuples[i].axis_x;
		means[i].axis_y = tuples[i].axis_y;
	}
	int lable = 0;
	for( i = 0; i!=tuples.size(); i++ ){	//	以初始簇中心,依次判断每个向量属于哪一个簇
		lable = clusterOfTuple(means, tuples[i] );
		//cout<<"push_back the: "<<"("<<tuples[i].axis_x<<","<<tuples[i].axis_y<<")"<<endl;
		clusters[lable].push_back(tuples[i]); // 将当前已经做了判断的点,放入相应的簇中
		//cout<<"push_back the: "<<"("<<(*(clusters[lable].end()-1)).axis_x<<","<<(*(clusters[lable].end()-1)).axis_y<<")"<<endl;
	}
	for( lable = 0; lable < k; lable++ ) // 打印初始聚类
	{
		cout<<"the "<<lable+1<<"th cluster:"<<endl;
		cout<<"the means is: "<<means[lable].axis_x<<","<<means[lable].axis_y<<endl;
		vector<Tuple> t = clusters[lable];
		for( i = 0; i < t.size(); i++ ){
			cout<<"("<<t[i].axis_x<<","<<t[i].axis_y<<")"<<" ";
		}
		cout<<endl;
	}
	float oldVar = -1;
	float newVar = getVar(clusters, means);
	while( abs(newVar - oldVar ) >= 1 )
	{
		for( i = 0; i < k; i++ ){
			means[i] = getMeans(clusters[i]);
		}
		oldVar = newVar;
		newVar = getVar( clusters, means );
		for( i = 0; i < k; i++ ){
			clusters[i].clear();
			if( clusters[i].size() == 0 )
				cout<<"Now,the cluster is cleared"<<endl;
		}
		for( i = 0; i != tuples.size(); ++i ){
			lable = clusterOfTuple( means, tuples[i] );
			clusters[lable].push_back(tuples[i]);
		}
		for( lable = 0; lable < k; lable++ ){
			cout<<"The "<<lable+1<<"th cluster: "<<endl;
			vector<Tuple> t = clusters[lable];
			for( i = 0; i < t.size(); i++ ){
				cout<<"("<<t[i].axis_x<<","<<t[i].axis_y<<")"<<" ";
			}
			cout<<endl;
		}
	}
}


数据如下:

1 1
2 1
1 2
2 2
4 3
5 3
4 4
5 4
9 9
10 10
12 12
12 11
11 10
12 11
11 12
100 100


运行结果如下:





  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值