[algorithm,c++] 基于c++的二维k-means代码实现

16 篇文章 0 订阅

利用上课期间打的草稿,中午饭期间写好的, 这里仅仅实现的是数据输入,

后续的 k-means 算法已经写好 ,等到第一节课下课之后再编码实现。

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <vector>

using namespace std ;

struct obj
{
	float _x ;
	float _y ;
	obj ( float x , float y ):_x(x) , _y(y) {} 
    obj () {} 
} ;

typedef obj object ;



void input ( int *k , int *n , vector<object*> &dataList , vector<object*> &curCenterPoint )
{
 float x , y ; 


 cout<<"input value of k "<<endl; 
 scanf ("%d" , &(*k)) ;
 
 printf("input k center points , initializing \n") ;

 for ( int i = 0 ; i < *k ; i++ )
 {
  printf("%d center point\n", (i+1)) ;
  printf("x:\n");
  scanf("%f", &x);
  printf("y:\n") ;
  scanf("%f", &y) ;
   
  curCenterPoint.push_back( new object(x, y ) ) ;
 }

 cout<<"input value of n "<<endl ;
 scanf("%d" , &(*n)) ;

 cout<<"input n points into dataList "<<endl ;

 for( i = 0 ; i < *n ; i++ )
 {

	 printf("x:\n") ;
	 scanf("%f" , &x) ;
	 printf("y:\n") ;
	 scanf("%f" , &y) ;

 
	 dataList.push_back( new object(x,  y)) ;
 }

}

int main ( int argc , char *argv [] )
{
	vector<object*> datalist ;
	vector<object*> curCenterPoint ;
	vector<object*> lastCenterPoint ;
	vector<vector<int> > cluster ;
	int k , n ;

    input (&k , &n , datalist, curCenterPoint ) ;

   return 0 ;
}


//这个是下课之后实现的k-means,目前还没有运行大型的数据集,但是算法中的各个子模块都已经 经过测试确定没有问题。

//稍后找一个数据集合测试一下,然后尽量改成递归的方式实现调用


#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <math.h>
#include <algorithm>


using namespace std ;


struct obj
{
   float _x ;
   float _y ;
   obj ( float x , float y ):_x(x) , _y(y) {} 
   obj () {} 
 	 bool operator==(const obj&other) const
 	 {
 		 if ( (fabs(_x -other._x)< 0.000001)  && (fabs(_y-other._y)<0.000001))
 			 return true;
 		 else
 			 return false ;
 	 }
 	 bool operator<(const obj&other) const
 	 {
 		 if(_x < other._x ||(  ( fabs(_x -other._x) < 0.000001) &&(_y<other._y) ))
 		 {
 			 return true ;
 		 }
 		 else
 			 return false ;
 	 }

 	 void showObject()
 	 {
 		 cout<<"x: "<<_x<<"  y: "<<_y<<endl;
 	 }
} ;


typedef obj object ;

float CityBlock( object &x ,object &y) 
{
 	 float distance = fabs(x._x-y._x)+fabs(x._y-y._y);
 	 return distance ;
}


float Eclid(object &x , object &y  ) 
{
 	 float distance = pow( fabs(x._x-y._x) , 2.0) +pow( fabs(x._y - y._y),2.0) ;
 	 return sqrtf(distance) ;
}




float (*fp_table[3]) (object &x, object &y ) ;  
vector<vector<int> > cluster ;


void initiFuncEntryPoint ()
{
 	 fp_table[0] = CityBlock ;
 	 fp_table[1] = Eclid ;
}




void input ( int *k , int *n ,int *p , vector<object*> &dataList , vector<object*> &curCenterPoint )
{
 float x , y ; 

 cout<<"input value of k "<<endl; 
 scanf ("%d" , &(*k)) ;
 
 cout<<"input value of n "<<endl ;
 scanf("%d" , &(*n)) ;

 printf("input k center points , initializing \n") ;


 for ( int i = 0 ; i < *k ; i++ )
 {
  printf("%d center point\n", (i+1)) ;
  printf("x:\n");
  scanf("%f", &x);
  printf("y:\n") ;
  scanf("%f", &y) ;
   
  curCenterPoint.push_back( new object(x, y ) ) ;
 }

 cout<<"input n points into dataList "<<endl ;


 for( int i = 0 ; i < *n ; i++ )
 {


 	  printf("x:\n") ;
 	  scanf("%f" , &x) ;
 	  printf("y:\n") ;
 	  scanf("%f" , &y) ;


 
 	  dataList.push_back( new object(x,  y)) ;
 }


  cout<<"input distance calculate method : 1 oujilide \n, 0:manhatun \n ,value more than 1 ,is the p value of minkonski"<<endl;
  scanf("%d" , &(*p)) ;


  	 initiFuncEntryPoint() ;  //if we want to use the function pointer array
 	 //we must call this method first 

}


/**
method : dis
this method is used to calculate the distance between the ith object which is 
stored in dataList , and jth object which is stored in dataList .


parameter list :

int i is the location of the ith element in dataList
int j is the location of the jth element in dataList
int p is the location of the pth element in function pointer array list 
vector<object*> dataList is the dataList in which stores all the data 
from the data set D .
*/
float dis ( int i , int j ,int p , vector<object*> dataList , vector<object*> curCenterPoint ) 
{
 	 
 	 if ( p ==0 )
 		 return CityBlock( (*dataList[i]) , (*curCenterPoint[j])) ;
 	 else if ( p == 1)
 		 return Eclid((*dataList[i]) , (*curCenterPoint[j])) ;
 	 else 
 		 return 0 ;
 	 
 	 //return  	  (*fp_table[p])( *(dataList[i]) , *(curCenterPoint[j]) ) ;
    
}


/**
method : posOfMin 
this method is used to select the location of the minimum value stored in 
distance list  with the length of n 

parameter list :
float *distance
here the distance is the pointer points to a float array 
and the array's length is n

the array which float pointer distance points to stores 
the distance between current object and the k centerPoints 

int n
n is the length or the size of the array to which the float pointers points

return value
int pos 
pos is used to describe the location of the points
note that : comparison method between float numbers is a little different
from the integer , so i rewrite the compare method with the name of 

*/

int posOfMin( float *distance , int k ) //tested
{
 	 int pos = 0 ;
 	 float min = distance[0];

 	 for ( int i = 0 ; i < k ; i++ )
 	 {
 		 cout<<"distance"<<i<<"  :"<<distance[i]<<endl; 
 	 }


 	 for ( int i = 1 ; i < k ; i++ )
 	 {
 	 cout<<" outer current pos "<<pos<<" min "<<min<<" distance[i] "<<distance[i]<<endl;


 		 if ( (min-distance[i]) > 0.000001  )
 		 {
 			 min = distance[i] ;
 			 pos = i ;
 			 cout<<"current pos "<<pos<<" min "<<min<<" distance[i] "<<distance[i]<<endl;
 		 }
 	 }


 	 return pos ;
}
bool isEqual( vector<object*> curCenterPoint , vector<object*>lastCenterPoint ) //tested
{
 	 bool flag = true ;


 	 sort(curCenterPoint.begin()  , curCenterPoint.end() ) ;
 	 sort(lastCenterPoint.begin() , lastCenterPoint.end() ) ;
 	 
 	 for (int i = 0 ; i < curCenterPoint.size() ; i++ )
 	 {
 		 if (!((*curCenterPoint[i])==(*lastCenterPoint[i])))
 		 {
 			 cout<<"show value "<<endl;
 			 printf("(%.2f , %.2f )\n"  , curCenterPoint[i]->_x , curCenterPoint[i]->_y) ;
 			 printf("(%.2f, %.2f) \n"  , lastCenterPoint[i]->_x ,lastCenterPoint[i]->_y) ;
 			 flag = false ;
 			 break ;
 		 }
 	 }


 	 return flag ;
}


void k_means_2 (int k ,int n , int p , vector<object*>&curCenterPoint , vector<object*> dataList )
{
 	 int counter ,k_kind;
 	 float *distance = new float [k] ;
 	 bool flag = true ;
 	 
 	 vector<object*> lastCenterPoint ;
 	 


 	 /**
 	  if we set  pp = cluster[i][x]  this statement's meaning is , 
 	  the pp-th elements stores in the dataset : dataList is belong to
 	  the i-th cluster ,and there are x objects in the dataset belong 
 	  to the i-th cluster.
 	 */
 	 
 	 while (flag )
 	 {
 		 //here we use the lastCenterPoint to store the curCenterPoint
 		 //because the latter one's value will be updated during the 
 		 //processing of re-build k centerPoints


 		 lastCenterPoint = curCenterPoint ;


 		 /**
 		 two-circly is used to calculate the distance between 
 		 i-th object stored in dataList and the j-th centerPoint (0<=j<=k-1),
 		 and stores the distance value into the distance float array ,
 		 and then find the minimum distance in distance array ,
 		 return its position in curCenterPoint by value pos 
 		 that means the i-th object is attributed to the pos-th cluster 
 		 */


 		 for ( int i = 0 ; i < n ; i++ )
 		 {
 			 counter = 0 ;
 			 memset(distance , 0 , k*sizeof(float )) ;
 			 for ( int j = 0 ; j < k  ; j++ )
 			 {
 				 if (i==j) 
 					 distance[j] = 99999;
 				 else
 				 distance[j] = dis(i , j , p ,dataList , curCenterPoint  ) ;
 			 }


 			 k_kind = posOfMin(distance  , k ) ;
 			 cluster[k_kind][counter++] = i;

 		 }//finish one clustering 


 		 /**
 		  next , we are going to re-build the k centerPoints in each new-built
 		  k clusters ,which can be implements in the following steps :
 		  1. 
 		  */


 		 for (int i = 0 ; i < k ; i++ )
 		 {
 			 float sumX = 0 ;
 			 float sumY = 0 ;
 			 for ( int j = 0 ; j < cluster[i].size() ; j++ )
 			 {
 				 //this is a little strange , but do not forget 
 				 //the dataList here is the vector of object pointers 
 				 sumX += dataList[cluster[i][j]]->_x;
 				 sumY += dataList[cluster[i][j]]->_y;
 			 }


 			 sumX /= k ;
 			 sumY /= k ;


 			 curCenterPoint.clear();
 			 curCenterPoint.push_back( new object(sumX , sumY)  ) ;


 			 flag = isEqual(curCenterPoint , lastCenterPoint ) ;
 		 }
 	 }


 	 cout<<"get the result "<<endl;
}


/**
 method:showResult

*/


void showResult (vector<object*> list )
{
 	 for ( int i = 0 ; i < cluster.size()  ;i++ )
 	 {
 		 cout<<"here is the objects attributed to "<<i<<" cluster"<<endl ;
 		 for ( int j = 0 ; j <cluster[i].size() ; j++ )
 		 {
 			 list[cluster[i][j]]->showObject() ;
 		 }
 	 }
}


int main ( int argc , char *argv [] )
{
 	 vector<object*> datalist ;
 	 vector<object*> curCenterPoint ;
 	 
 	 int k , n ,p;
 	  
    input (&k , &n ,&p, curCenterPoint,datalist ) ;


 	 /**
 	 if (!isEqual(datalist , curCenterPoint) )
 		 cout<<"works well the isEqual"<<endl;


 	 
 	 float distance[] ={3.1 , 1.81, 1.8} ;
 	 //here , we are going to testify the posMin function


 	 initiFuncEntryPoint() ; //if we want to use the function pointer array
 	 //we must call this method first 
 		 cout<<"dis is "<<dis(0,0,p, datalist, curCenterPoint)<<endl;
   */
 	 k_means_2(k,n,p,datalist,curCenterPoint) ;
 	 showResult(datalist) ;


   system("pause") ;
   return 0 ;
}





不足之处:

还没有实现支持多维的数据,并且通常情况下,数据集合中的数据都是通过文件的方式作为输入的。

应该添加一个读取文件,和将结果数据输出到文件中的实现这样功能的数据模块到程序中。

当然,本篇文章的格式处理的有些草率,过后会进行修改的。

这个是最近写的算是比较大的程序了。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值