// 输入:_data图像,K:聚类中心数(类别数量)
// labels:类别矩阵
double cv::kmeans( InputArray _data, int K,
InputOutputArray _bestLabels,
TermCriteria criteria, int attempts,
int flags, OutputArray _centers )
{
CV_INSTRUMENT_REGION();
const int SPP_TRIALS = 3;
Mat data0 = _data.getMat(); //矩阵转图像
const bool isrow = data0.rows == 1; //只有一行
const int N = isrow ? data0.cols : data0.rows; //只有一行时,使用列作为N,否则以行做N
const int dims = (isrow ? 1 : data0.cols)*data0.channels(); //维数,只有一行时,维数为3,否则维数为列*3
const int type = data0.depth(); //图像的type
attempts = std::max(attempts, 1); //迭代次数
CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); //K大于0,1维或者2维矩阵,32位float图像
CV_CheckGE(N, K, "Number of clusters should be more than number of elements"); //簇数量K必须大于元素数N???
Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step)); //重新转换成结构化的数据
_bestLabels.create(N, 1, CV_32S, -1, true); //labels
Mat _labels, best_labels = _bestLabels.getMat(); //矩阵转换成图像
//! k-Means flags
KMEANS_RANDOM_CENTERS = 0, // 每次迭代随机生成簇中心
KMEANS_PP_CENTERS = 2, // [Arthur2007].Arthur and Vassilvitskii族中心初始化方法
KMEANS_USE_INITIAL_LABELS = 1 // 先使用用户提供的标签(而不是计算初始化中心),再进一步尝试,使用随机或半随机中心。
if (flags & CV_KMEANS_USE_INITIAL_LABELS)
{
CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) &&
best_labels.cols*best_labels.rows == N &&
best_labels.type() == CV_32S &&
best_labels.isContinuous());
best_labels.reshape(1, N).copyTo(_labels);
for (int i = 0; i < N; i++)
{
CV_Assert((unsigned)_labels.at<int>(i) < (unsigned)K);
}
}
else
{
//行和列不能同时等于1,元素数量必须等于N,必须是32bit的UINT型数据,数据必须是连续的。
if (!((best_labels.cols == 1 || best_labels.rows == 1) &&
best_labels.cols*best_labels.rows == N &&
best_labels.type() == CV_32S &&
best_labels.isContinuous()))
{
_bestLabels.create(N, 1, CV_32S);
best_labels = _bestLabels.getMat(); //赋值
}
_labels.create(best_labels.size(), best_labels.type());
}
int* labels = _labels.ptr<int>();
Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); //聚类中心、老化的聚类中心、临时聚类中心
cv::AutoBuffer<int, 64> counters(K); //申请内存(计数器)
cv::AutoBuffer<double, 64> dists(N); //申请内存(距离)
RNG& rng = theRNG(); //opencv产生随机数接口
if (criteria.type & TermCriteria::EPS)
criteria.epsilon = std::max(criteria.epsilon, 0.);
else
criteria.epsilon = FLT_EPSILON;
criteria.epsilon *= criteria.epsilon;
if (criteria.type & TermCriteria::COUNT)
criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
else
criteria.maxCount = 100;
if (K == 1)
{
attempts = 1; //聚类中心数为1,则只需要迭代一次就可以
criteria.maxCount = 2; //最大计数为2
}
cv::AutoBuffer<Vec2f, 64> box(dims); //
if (!(flags & KMEANS_PP_CENTERS)) //Arthur and Vassilvitskii聚类中心初始方式
{
{
const float* sample = data.ptr<float>(0);
for (int j = 0; j < dims; j++)
box[j] = Vec2f(sample[j], sample[j]);
}
for (int i = 1; i < N; i++)
{
const float* sample = data.ptr<float>(i);
for (int j = 0; j < dims; j++)
{
float v = sample[j];
box[j][0] = std::min(box[j][0], v);
box[j][1] = std::max(box[j][1], v);
}
}
}
double best_compactness = DBL_MAX;
for (int a = 0; a < attempts; a++) //迭代
{
double compactness = 0;
for (int iter = 0; ;)
{
double max_center_shift = iter == 0 ? DBL_MAX : 0.0;
swap(centers, old_centers);
if (iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)))
{
if (flags & KMEANS_PP_CENTERS)
generateCentersPP(data, centers, K, rng, SPP_TRIALS);
else
{
for (int k = 0; k < K; k++)
generateRandomCenter(dims, box.data(), centers.ptr<float>(k), rng);
}
}
else
{
// compute centers
centers = Scalar(0);
for (int k = 0; k < K; k++)
counters[k] = 0;
for (int i = 0; i < N; i++)
{
const float* sample = data.ptr<float>(i);
int k = labels[i];
float* center = centers.ptr<float>(k);
for (int j = 0; j < dims; j++)
center[j] += sample[j];
counters[k]++;
}
for (int k = 0; k < K; k++)
{
if (counters[k] != 0)
continue;
// if some cluster appeared to be empty then:
// 1. find the biggest cluster
// 2. find the farthest from the center point in the biggest cluster
// 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
int max_k = 0;
for (int k1 = 1; k1 < K; k1++)
{
if (counters[max_k] < counters[k1])
max_k = k1;
}
double max_dist = 0;
int farthest_i = -1;
float* base_center = centers.ptr<float>(max_k);
float* _base_center = temp.ptr<float>(); // normalized
float scale = 1.f/counters[max_k];
for (int j = 0; j < dims; j++)
_base_center[j] = base_center[j]*scale;
for (int i = 0; i < N; i++)
{
if (labels[i] != max_k)
continue;
const float* sample = data.ptr<float>(i);
double dist = hal::normL2Sqr_(sample, _base_center, dims);
if (max_dist <= dist)
{
max_dist = dist;
farthest_i = i;
}
}
counters[max_k]--;
counters[k]++;
labels[farthest_i] = k;
const float* sample = data.ptr<float>(farthest_i);
float* cur_center = centers.ptr<float>(k);
for (int j = 0; j < dims; j++)
{
base_center[j] -= sample[j];
cur_center[j] += sample[j];
}
}
for (int k = 0; k < K; k++)
{
float* center = centers.ptr<float>(k);
CV_Assert( counters[k] != 0 );
float scale = 1.f/counters[k];
for (int j = 0; j < dims; j++)
center[j] *= scale;
if (iter > 0)
{
double dist = 0;
const float* old_center = old_centers.ptr<float>(k);
for (int j = 0; j < dims; j++)
{
double t = center[j] - old_center[j];
dist += t*t;
}
max_center_shift = std::max(max_center_shift, dist);
}
}
}
bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon);
if (isLastIter)
{
// don't re-assign labels to avoid creation of empty clusters
parallel_for_(Range(0, N), KMeansDistanceComputer<true>(dists.data(), labels, data, centers), (double)divUp((size_t)(dims * N), CV_KMEANS_PARALLEL_GRANULARITY));
compactness = sum(Mat(Size(N, 1), CV_64F, &dists[0]))[0];
break;
}
else
{
// assign labels
parallel_for_(Range(0, N), KMeansDistanceComputer<false>(dists.data(), labels, data, centers), (double)divUp((size_t)(dims * N * K), CV_KMEANS_PARALLEL_GRANULARITY));
}
}
if (compactness < best_compactness)
{
best_compactness = compactness;
if (_centers.needed())
{
if (_centers.fixedType() && _centers.channels() == dims)
centers.reshape(dims).copyTo(_centers);
else
centers.copyTo(_centers);
}
_labels.copyTo(best_labels);
}
}
return best_compactness; //返回最佳简洁的labels矩阵
}
opencv源码阅读:K均值聚类算法接口kmeans()
最新推荐文章于 2023-02-23 11:59:27 发布