利用上课期间打的草稿,中午饭期间写好的, 这里仅仅实现的是数据输入,
后续的 k-means 算法已经写好 ,等到第一节课下课之后再编码实现。
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <vector>
using namespace std ;
struct obj
{
float _x ;
float _y ;
obj ( float x , float y ):_x(x) , _y(y) {}
obj () {}
} ;
typedef obj object ;
void input ( int *k , int *n , vector<object*> &dataList , vector<object*> &curCenterPoint )
{
float x , y ;
cout<<"input value of k "<<endl;
scanf ("%d" , &(*k)) ;
printf("input k center points , initializing \n") ;
for ( int i = 0 ; i < *k ; i++ )
{
printf("%d center point\n", (i+1)) ;
printf("x:\n");
scanf("%f", &x);
printf("y:\n") ;
scanf("%f", &y) ;
curCenterPoint.push_back( new object(x, y ) ) ;
}
cout<<"input value of n "<<endl ;
scanf("%d" , &(*n)) ;
cout<<"input n points into dataList "<<endl ;
for( i = 0 ; i < *n ; i++ )
{
printf("x:\n") ;
scanf("%f" , &x) ;
printf("y:\n") ;
scanf("%f" , &y) ;
dataList.push_back( new object(x, y)) ;
}
}
int main ( int argc , char *argv [] )
{
vector<object*> datalist ;
vector<object*> curCenterPoint ;
vector<object*> lastCenterPoint ;
vector<vector<int> > cluster ;
int k , n ;
input (&k , &n , datalist, curCenterPoint ) ;
return 0 ;
}
//这个是下课之后实现的k-means,目前还没有运行大型的数据集,但是算法中的各个子模块都已经 经过测试确定没有问题。
//稍后找一个数据集合测试一下,然后尽量改成递归的方式实现调用
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <math.h>
#include <algorithm>
using namespace std ;
struct obj
{
float _x ;
float _y ;
obj ( float x , float y ):_x(x) , _y(y) {}
obj () {}
bool operator==(const obj&other) const
{
if ( (fabs(_x -other._x)< 0.000001) && (fabs(_y-other._y)<0.000001))
return true;
else
return false ;
}
bool operator<(const obj&other) const
{
if(_x < other._x ||( ( fabs(_x -other._x) < 0.000001) &&(_y<other._y) ))
{
return true ;
}
else
return false ;
}
void showObject()
{
cout<<"x: "<<_x<<" y: "<<_y<<endl;
}
} ;
typedef obj object ;
float CityBlock( object &x ,object &y)
{
float distance = fabs(x._x-y._x)+fabs(x._y-y._y);
return distance ;
}
float Eclid(object &x , object &y )
{
float distance = pow( fabs(x._x-y._x) , 2.0) +pow( fabs(x._y - y._y),2.0) ;
return sqrtf(distance) ;
}
float (*fp_table[3]) (object &x, object &y ) ;
vector<vector<int> > cluster ;
void initiFuncEntryPoint ()
{
fp_table[0] = CityBlock ;
fp_table[1] = Eclid ;
}
void input ( int *k , int *n ,int *p , vector<object*> &dataList , vector<object*> &curCenterPoint )
{
float x , y ;
cout<<"input value of k "<<endl;
scanf ("%d" , &(*k)) ;
cout<<"input value of n "<<endl ;
scanf("%d" , &(*n)) ;
printf("input k center points , initializing \n") ;
for ( int i = 0 ; i < *k ; i++ )
{
printf("%d center point\n", (i+1)) ;
printf("x:\n");
scanf("%f", &x);
printf("y:\n") ;
scanf("%f", &y) ;
curCenterPoint.push_back( new object(x, y ) ) ;
}
cout<<"input n points into dataList "<<endl ;
for( int i = 0 ; i < *n ; i++ )
{
printf("x:\n") ;
scanf("%f" , &x) ;
printf("y:\n") ;
scanf("%f" , &y) ;
dataList.push_back( new object(x, y)) ;
}
cout<<"input distance calculate method : 1 oujilide \n, 0:manhatun \n ,value more than 1 ,is the p value of minkonski"<<endl;
scanf("%d" , &(*p)) ;
initiFuncEntryPoint() ; //if we want to use the function pointer array
//we must call this method first
}
/**
method : dis
this method is used to calculate the distance between the ith object which is
stored in dataList , and jth object which is stored in dataList .
parameter list :
int i is the location of the ith element in dataList
int j is the location of the jth element in dataList
int p is the location of the pth element in function pointer array list
vector<object*> dataList is the dataList in which stores all the data
from the data set D .
*/
float dis ( int i , int j ,int p , vector<object*> dataList , vector<object*> curCenterPoint )
{
if ( p ==0 )
return CityBlock( (*dataList[i]) , (*curCenterPoint[j])) ;
else if ( p == 1)
return Eclid((*dataList[i]) , (*curCenterPoint[j])) ;
else
return 0 ;
//return (*fp_table[p])( *(dataList[i]) , *(curCenterPoint[j]) ) ;
}
/**
method : posOfMin
this method is used to select the location of the minimum value stored in
distance list with the length of n
parameter list :
float *distance
here the distance is the pointer points to a float array
and the array's length is n
the array which float pointer distance points to stores
the distance between current object and the k centerPoints
int n
n is the length or the size of the array to which the float pointers points
return value
int pos
pos is used to describe the location of the points
note that : comparison method between float numbers is a little different
from the integer , so i rewrite the compare method with the name of
*/
int posOfMin( float *distance , int k ) //tested
{
int pos = 0 ;
float min = distance[0];
for ( int i = 0 ; i < k ; i++ )
{
cout<<"distance"<<i<<" :"<<distance[i]<<endl;
}
for ( int i = 1 ; i < k ; i++ )
{
cout<<" outer current pos "<<pos<<" min "<<min<<" distance[i] "<<distance[i]<<endl;
if ( (min-distance[i]) > 0.000001 )
{
min = distance[i] ;
pos = i ;
cout<<"current pos "<<pos<<" min "<<min<<" distance[i] "<<distance[i]<<endl;
}
}
return pos ;
}
bool isEqual( vector<object*> curCenterPoint , vector<object*>lastCenterPoint ) //tested
{
bool flag = true ;
sort(curCenterPoint.begin() , curCenterPoint.end() ) ;
sort(lastCenterPoint.begin() , lastCenterPoint.end() ) ;
for (int i = 0 ; i < curCenterPoint.size() ; i++ )
{
if (!((*curCenterPoint[i])==(*lastCenterPoint[i])))
{
cout<<"show value "<<endl;
printf("(%.2f , %.2f )\n" , curCenterPoint[i]->_x , curCenterPoint[i]->_y) ;
printf("(%.2f, %.2f) \n" , lastCenterPoint[i]->_x ,lastCenterPoint[i]->_y) ;
flag = false ;
break ;
}
}
return flag ;
}
void k_means_2 (int k ,int n , int p , vector<object*>&curCenterPoint , vector<object*> dataList )
{
int counter ,k_kind;
float *distance = new float [k] ;
bool flag = true ;
vector<object*> lastCenterPoint ;
/**
if we set pp = cluster[i][x] this statement's meaning is ,
the pp-th elements stores in the dataset : dataList is belong to
the i-th cluster ,and there are x objects in the dataset belong
to the i-th cluster.
*/
while (flag )
{
//here we use the lastCenterPoint to store the curCenterPoint
//because the latter one's value will be updated during the
//processing of re-build k centerPoints
lastCenterPoint = curCenterPoint ;
/**
two-circly is used to calculate the distance between
i-th object stored in dataList and the j-th centerPoint (0<=j<=k-1),
and stores the distance value into the distance float array ,
and then find the minimum distance in distance array ,
return its position in curCenterPoint by value pos
that means the i-th object is attributed to the pos-th cluster
*/
for ( int i = 0 ; i < n ; i++ )
{
counter = 0 ;
memset(distance , 0 , k*sizeof(float )) ;
for ( int j = 0 ; j < k ; j++ )
{
if (i==j)
distance[j] = 99999;
else
distance[j] = dis(i , j , p ,dataList , curCenterPoint ) ;
}
k_kind = posOfMin(distance , k ) ;
cluster[k_kind][counter++] = i;
}//finish one clustering
/**
next , we are going to re-build the k centerPoints in each new-built
k clusters ,which can be implements in the following steps :
1.
*/
for (int i = 0 ; i < k ; i++ )
{
float sumX = 0 ;
float sumY = 0 ;
for ( int j = 0 ; j < cluster[i].size() ; j++ )
{
//this is a little strange , but do not forget
//the dataList here is the vector of object pointers
sumX += dataList[cluster[i][j]]->_x;
sumY += dataList[cluster[i][j]]->_y;
}
sumX /= k ;
sumY /= k ;
curCenterPoint.clear();
curCenterPoint.push_back( new object(sumX , sumY) ) ;
flag = isEqual(curCenterPoint , lastCenterPoint ) ;
}
}
cout<<"get the result "<<endl;
}
/**
method:showResult
*/
void showResult (vector<object*> list )
{
for ( int i = 0 ; i < cluster.size() ;i++ )
{
cout<<"here is the objects attributed to "<<i<<" cluster"<<endl ;
for ( int j = 0 ; j <cluster[i].size() ; j++ )
{
list[cluster[i][j]]->showObject() ;
}
}
}
int main ( int argc , char *argv [] )
{
vector<object*> datalist ;
vector<object*> curCenterPoint ;
int k , n ,p;
input (&k , &n ,&p, curCenterPoint,datalist ) ;
/**
if (!isEqual(datalist , curCenterPoint) )
cout<<"works well the isEqual"<<endl;
float distance[] ={3.1 , 1.81, 1.8} ;
//here , we are going to testify the posMin function
initiFuncEntryPoint() ; //if we want to use the function pointer array
//we must call this method first
cout<<"dis is "<<dis(0,0,p, datalist, curCenterPoint)<<endl;
*/
k_means_2(k,n,p,datalist,curCenterPoint) ;
showResult(datalist) ;
system("pause") ;
return 0 ;
}
不足之处:
还没有实现支持多维的数据,并且通常情况下,数据集合中的数据都是通过文件的方式作为输入的。
应该添加一个读取文件,和将结果数据输出到文件中的实现这样功能的数据模块到程序中。
当然,本篇文章的格式处理的有些草率,过后会进行修改的。
这个是最近写的算是比较大的程序了。