所有代码均已测试,复制粘贴即可运行。
头文件如下:
#ifndef CLASTER_H
#define CLASTER_H
#include "common.h"
//聚类算法编写
// http://blog.csdn.net/cai0538/article/details/7061922
// http://blog.csdn.net/qll125596718/article/details/8243404
class Tuple
{
public:
float axis_x;
float axis_y;
};
class clasterOne
{
private:
const int k;
vector<Tuple> tuples;
public:
clasterOne(int,vector<Tuple>);
float getDistXY( Tuple a, Tuple b );
int clusterOfTuple( Tuple means[], Tuple tuple );
float getVar( vector<Tuple> clusters[], Tuple means[] );
Tuple getMeans( vector<Tuple> cluster );
void KMeans( );
};
void clasterOneTest();
#endif
CPP文件如下:
#include "cluster.h"
void clasterOneTest()
{
char fname[256] = "data.txt";
int K;
//cout<<"Please input the file name: ";
//cin>>fname;
cout<<endl;
cout<<"Please input the k num:";
cin>>K;
cout<<endl;
ifstream infile;
infile.open(fname,ios::in);
if( !infile ){
cout<<"Cannot open the file-"<<fname<<endl;
return;
}
int count = 0;
vector<Tuple> tuples;
Tuple tuple;
while( !infile.eof() )
{
count++;
if( count%2 == 1 )
infile>>tuple.axis_x;
else{
infile>>tuple.axis_y;
tuples.push_back(tuple);
}
}
cout<<"Now, you have the following numbers:"<<endl;
for( vector<Tuple>::size_type ix = 0; ix != tuples.size(); ++ix ){
cout<<"("<<tuples[ix].axis_x<<","<<tuples[ix].axis_y<<")"<<" ";
}
cout<<endl;
clasterOne cOne( K, tuples );
cout<<"Begin to cluster:"<<endl;
cOne.KMeans();
return;
}
clasterOne::clasterOne( int k ,vector<Tuple> vT ):k(k),tuples(vT){ }
float clasterOne::getDistXY( Tuple a, Tuple b )
{
return sqrt( (a.axis_x - b.axis_x)*(a.axis_x - b.axis_x)+(a.axis_y - b.axis_y)*(a.axis_y - b.axis_y) );
}
int clasterOne::clusterOfTuple( Tuple means[], Tuple tuple )
{
float dist = getDistXY( means[0], tuple );
float tmp;
int label = 0;
for( int i = 1; i < k; i++ )
{
tmp = getDistXY( means[i], tuple );
if( tmp < dist ) { dist = tmp; label = i; }
}
return label;
}
float clasterOne::getVar( vector<Tuple> clusters[], Tuple means[] )
{
float var = 0;
for( int i = 0; i < k; i++ )
{
vector<Tuple> t = clusters[i];
for( int j = 0; j < t.size(); j++ ){
var += getDistXY( t[j], means[i] );
}
}
return var;
}
Tuple clasterOne::getMeans( vector<Tuple> cluster )
{
int num = cluster.size();
double meansX = 0,meansY = 0;
Tuple t;
for( int i =0; i< num; i++ ){
meansX += cluster[i].axis_x;
meansY += cluster[i].axis_y;
}
t.axis_x = meansX / num;
t.axis_y = meansY / num;
return t;
}
void clasterOne::KMeans( )
{
vector<Tuple> *clusters= new vector<Tuple>[k]; // 向量数组,一个数组元素是一个vector向量
Tuple *means = new Tuple[k]; // 用于保存各个簇的中心(均值)
int i =0;
for( i = 0; i < k; i++ ){ // 初始并没有簇,那么均值设置为前k个向量的值,意思是默认地,将前K个向量作为簇中心
means[i].axis_x = tuples[i].axis_x;
means[i].axis_y = tuples[i].axis_y;
}
int lable = 0;
for( i = 0; i!=tuples.size(); i++ ){ // 以初始簇中心,依次判断每个向量属于哪一个簇
lable = clusterOfTuple(means, tuples[i] );
//cout<<"push_back the: "<<"("<<tuples[i].axis_x<<","<<tuples[i].axis_y<<")"<<endl;
clusters[lable].push_back(tuples[i]); // 将当前已经做了判断的点,放入相应的簇中
//cout<<"push_back the: "<<"("<<(*(clusters[lable].end()-1)).axis_x<<","<<(*(clusters[lable].end()-1)).axis_y<<")"<<endl;
}
for( lable = 0; lable < k; lable++ ) // 打印初始聚类
{
cout<<"the "<<lable+1<<"th cluster:"<<endl;
cout<<"the means is: "<<means[lable].axis_x<<","<<means[lable].axis_y<<endl;
vector<Tuple> t = clusters[lable];
for( i = 0; i < t.size(); i++ ){
cout<<"("<<t[i].axis_x<<","<<t[i].axis_y<<")"<<" ";
}
cout<<endl;
}
float oldVar = -1;
float newVar = getVar(clusters, means);
while( abs(newVar - oldVar ) >= 1 )
{
for( i = 0; i < k; i++ ){
means[i] = getMeans(clusters[i]);
}
oldVar = newVar;
newVar = getVar( clusters, means );
for( i = 0; i < k; i++ ){
clusters[i].clear();
if( clusters[i].size() == 0 )
cout<<"Now,the cluster is cleared"<<endl;
}
for( i = 0; i != tuples.size(); ++i ){
lable = clusterOfTuple( means, tuples[i] );
clusters[lable].push_back(tuples[i]);
}
for( lable = 0; lable < k; lable++ ){
cout<<"The "<<lable+1<<"th cluster: "<<endl;
vector<Tuple> t = clusters[lable];
for( i = 0; i < t.size(); i++ ){
cout<<"("<<t[i].axis_x<<","<<t[i].axis_y<<")"<<" ";
}
cout<<endl;
}
}
}
数据如下:
1 1
2 1
1 2
2 2
4 3
5 3
4 4
5 4
9 9
10 10
12 12
12 11
11 10
12 11
11 12
100 100
运行结果如下: