K均值尝试找到数据的自然类别。用户设置类别个数,K均值迅速找到“好的”类别中心。“好的”意味着聚类的中心位于数据的自然类别中心。
void cvKMeans2( const CvArr* samples, int cluster_count,
CvArr* labels, CvTermCriteria termcrit );
samples
输入样本的浮点矩阵,每个样本一行。
cluster_count
所给定的聚类数目
labels
输出整数向量:每个样本对应的类别标识,在 cluster_count 内。
termcrit
指定聚类的最大迭代次数和/或精度(两次迭代引起的聚类中心的移动距离)
函数 cvKMeans2 执行 k-means 算法 搜索 cluster_count 个类别的中心并对样本进行分类,输出 labels(i) 为样本 i 的类别标识。
#include "stdafx.h"
#include "cxcore.h"
#include "highgui.h" //包含highgui.h来使用窗口输出
int _tmain(int argc, _TCHAR* argv[])
{
#define MAX_CLUSTERS 5 //设置类别的颜色,个数(《=5)
CvScalar color_tab[MAX_CLUSTERS];
IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 );
CvRNG rng = cvRNG(0xffffffff);
color_tab[0] = CV_RGB(255,0,0);
color_tab[1] = CV_RGB(0,255,0);
color_tab[2] = CV_RGB(100,100,255);
color_tab[3] = CV_RGB(255,0,255);
color_tab[4] = CV_RGB(255,255,0);
cvNamedWindow( "clusters", 1 );
for(;;)
{
int k, cluster_count = cvRandInt(&rng)%MAX_CLUSTERS + 1;
int i, sample_count = cvRandInt(&rng)%1000 + 1;
CvMat* points = cvCreateMat( sample_count, 1, CV_32FC2 );
CvMat* clusters = cvCreateMat( sample_count, 1, CV_32SC1 );
/* generate random sample from multivariate Gaussian distribution */
for( k = 0; k < cluster_count; k++ )
{
CvPoint center;
CvMat point_chunk;
center.x = cvRandInt(&rng)%img->width;
center.y = cvRandInt(&rng)%img->height;
cvGetRows( points, &point_chunk, k*sample_count/cluster_count,
k == cluster_count - 1 ? sample_count :
(k+1)*sample_count/cluster_count );
cvRandArr( &rng, &point_chunk, CV_RAND_NORMAL,
cvScalar(center.x,center.y,0,0),
cvScalar(img->width/6, img->height/6,0,0) );
}
/* shuffle samples */
for( i = 0; i < sample_count/2; i++ )
{
CvPoint2D32f* pt1 = (CvPoint2D32f*)points->data.fl +
cvRandInt(&rng)%sample_count;
CvPoint2D32f* pt2 = (CvPoint2D32f*)points->data.fl +
cvRandInt(&rng)%sample_count;
CvPoint2D32f temp;
CV_SWAP( *pt1, *pt2, temp );
}
//使用cvKMeans2(),直到聚类中心的最大移动小于1
cvKMeans2( points, cluster_count, clusters,
cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ));
cvZero( img );
for( i = 0; i < sample_count; i++ )
{
CvPoint2D32f pt = ((CvPoint2D32f*)points->data.fl)[i];
int cluster_idx = clusters->data.i[i];
cvCircle( img, cvPointFrom32f(pt), 2,
color_tab[cluster_idx], CV_FILLED );
}
cvReleaseMat( &points );
cvReleaseMat( &clusters );
cvShowImage( "clusters", img );
int key = cvWaitKey(0);
if( key == 27 ) // 'ESC',press then exit
break;
}
return 0;
}
两个聚集中心
三个聚集中心
五个聚集中心