在看源码前最好看下
模式识别与机器学习 (Bishop) 暂无中文版
这本书的第9章,有详细的公式推导,斯坦福大学公开课NG的课件,可以两者结合看(Bishop的书和NG的课)
NG的课件翻译版本(EM课件) http://blog.csdn.net/yeyang911/article/details/28095153
源码解读:主要是E-step和M-step
EM算法的这种思想是值得借鉴的
EM算法的简单例子http://www.cnblogs.com/zhangchaoyang/articles/2623364.html
E-step 源码:
void EM::eStep()
{
// Compute probs_ik from means_k, covs_k and weights_k.
trainProbs.create(trainSamples.rows, nclusters, CV_64FC1);
//概率矩阵(样本个数,聚类个数,数据类型)
trainLabels.create(trainSamples.rows, 1, CV_32SC1);
//标签向量(样本个数,1,数据类型)
trainLogLikelihoods.create(trainSamples.rows, 1, CV_64FC1);
//log后的似然(样本个数,1,数据类型)
computeLogWeightDivDet(); //计算 LogWeightDivDet
CV_DbgAssert(trainSamples.type() == CV_64FC1); //数据类型检测
CV_DbgAssert(means.type() == CV_64FC1); //数据类型检测
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
Mat sampleProbs = trainProbs.row(sampleIndex);
Vec2d res = computeProbabilities(trainSamples.row(sampleIndex), &sampleProbs);
//计算概率
trainLogLikelihoods.at<double>(sampleIndex) = res[0];
trainLabels.at<int>(sampleIndex) = static_cast<int>(res[1]);
}
}
其中涉及了 computeLogWeightDivDet() computeProbabilities() 两个函数
computeLogWeightDivDet():
void EM::computeLogWeightDivDet()//计算log 的权值
{
CV_Assert(!covsEigenValues.empty());
Mat logWeights;
cv::max(weights, DBL_MIN, weights);//防止数据过小
log(weights, logWeights); //weights(聚类的数据占样本总数的的比例)转到log
logWeightDivDet.create(1, nclusters, CV_64FC1);
// note: logWeightDivDet = log(weight_k) - 0.5 * log(|det(cov_k)|)
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
double logDetCov = 0.;
const int evalCount = static_cast<int>(covsEigenValues[clusterIndex].total()); //有多少特征值
for(int di = 0; di < evalCount; di++)
logDetCov += std::log(covsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0));
// logDetCov为Cov的值的和的log
logWeightDivDet.at<double>(clusterIndex) = logWeights.at<double>(clusterIndex) - 0.5 * logDetCov;
// logWeightDivDet 意味着 logWeight div(-) det(Sum(logcov))
}
}
computeProbabilities() :
注意上面的公式,转换到log后,相乘除的变为相加减
Vec2d EM::computeProbabilities(const Mat& sample, Mat* probs) const
{
// L_ik = log(weight_k) - 0.5 * log(|det(cov_k)|) - 0.5 *(x_i - mean_k)' cov_k^(-1) (x_i - mean_k)]
// q = arg(max_k(L_ik))
// probs_ik = exp(L_ik - L_iq) / (1 + sum_j!=q (exp(L_ij - L_iq))
// see Alex Smola's blog http://blog.smola.org/page/2 for
// details on the log-sum-exp trick
CV_Assert(!means.empty());
CV_Assert(sample.type() == CV_64FC1);
CV_Assert(sample.rows == 1);
CV_Assert(sample.cols == means.cols);
int dim = sample.cols;
Mat L(1, nclusters, CV_64FC1); //L 1*nclusters
int label = 0;
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
const Mat centeredSample = sample - means.row(clusterIndex); //减去均值
Mat rotatedCenteredSample = covMatType != EM::COV_MAT_GENERIC ?
centeredSample : centeredSample * covsRotateMats[clusterIndex];
double Lval = 0;
for(int di = 0; di < dim; di++)
{
double w = invCovsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0);
//对角线上的值或者每行第一个值
double val = rotatedCenteredSample.at<double>(di);
Lval += w * val * val;//方差乘以权值 协方差矩阵的倒数的平方
}
CV_DbgAssert(!logWeightDivDet.empty());
L.at<double>(clusterIndex) = logWeightDivDet.at<double>(clusterIndex) - 0.5 * Lval;
// note: logWeightDivDet = log(weight_k) - 0.5 * log(|det(cov_k)|)
// note: L.at<double>(clusterIndex) = log(weight_k) - 0.5 * log(|det(cov_k)|)-0.5 * Lval
if(L.at<double>(clusterIndex) > L.at<double>(label))
label = clusterIndex; //求似然最大的label值
}
double maxLVal = L.at<double>(label); //
Mat expL_Lmax = L; // exp(L_ij - L_iq) //L 1*nclusters
for(int i = 0; i < L.cols; i++)
expL_Lmax.at<double>(i) = std::exp(L.at<double>(i) - maxLVal);
double expDiffSum = sum(expL_Lmax)[0]; // sum_j(exp(L_ij - L_iq))
if(probs) //probs
{
probs->create(1, nclusters, CV_64FC1);
double factor = 1./expDiffSum;
expL_Lmax *= factor;
expL_Lmax.copyTo(*probs);
}
Vec2d res;
res[0] = std::log(expDiffSum) + maxLVal - 0.5 * dim * CV_LOG2PI; //dim样本维数 CV_LOG2PI (1.8378770664093454835606594728112)
//
res[1] = label;
return res;
}
M-step:
void EM::mStep()
{
// Update means_k, covs_k and weights_k from probs_ik
int dim = trainSamples.cols;
// Update weights
// not normalized first
reduce(trainProbs, weights, 0, CV_REDUCE_SUM); //计算每列的概率和
/*
cvReduce( const CvArr* src, CvArr* dst, int dim, int op=CV_REDUCE_SUM);
src
输入矩阵
dst
输出的通过处理输入矩阵的所有行/列而得到的单行/列向量
dim
矩阵被简化后的维数索引.0意味着矩阵被处理成一行,1意味着矩阵被处理成为一列,-1时维数将根据输出向量的大小自动选择.
op
简化操作的方式,可以有以下几种取值:
CV_REDUCE_SUM-输出是矩阵的所有行/列的和.
CV_REDUCE_AVG-输出是矩阵的所有行/列的平均向量.
CV_REDUCE_MAX-输出是矩阵的所有行/列的最大值.
CV_REDUCE_MIN-输出是矩阵的所有行/列的最小值.
*/
// Update means
means.create(nclusters, dim, CV_64FC1);
means = Scalar(0);
const double minPosWeight = trainSamples.rows * DBL_EPSILON; //小概率
double minWeight = DBL_MAX;
int minWeightClusterIndex = -1;
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
//概率过小跳过
continue;
if(weights.at<double>(clusterIndex) < minWeight)//求最小概率值
{
minWeight = weights.at<double>(clusterIndex);
minWeightClusterIndex = clusterIndex;//得到聚类索引值
}
Mat clusterMean = means.row(clusterIndex);
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
clusterMean += trainProbs.at<double>(sampleIndex, clusterIndex) * trainSamples.row(sampleIndex);
clusterMean /= weights.at<double>(clusterIndex);
}
// Update covsEigenValues and invCovsEigenValues
covs.resize(nclusters);
covsEigenValues.resize(nclusters);
if(covMatType == EM::COV_MAT_GENERIC)
covsRotateMats.resize(nclusters);
invCovsEigenValues.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
//概率过小跳过
continue;
if(covMatType != EM::COV_MAT_SPHERICAL)
covsEigenValues[clusterIndex].create(1, dim, CV_64FC1);
else
covsEigenValues[clusterIndex].create(1, 1, CV_64FC1);
if(covMatType == EM::COV_MAT_GENERIC)
covs[clusterIndex].create(dim, dim, CV_64FC1);
Mat clusterCov = covMatType != EM::COV_MAT_GENERIC ?
covsEigenValues[clusterIndex] : covs[clusterIndex];
clusterCov = Scalar(0);
Mat centeredSample;
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
centeredSample = trainSamples.row(sampleIndex) - means.row(clusterIndex);
//centeredSample = 样本值-平均值的差值 //向量
获取带概率的协方差矩阵--begin
if(covMatType == EM::COV_MAT_GENERIC)
clusterCov += trainProbs.at<double>(sampleIndex, clusterIndex) * centeredSample.t() * centeredSample;
else
{
double p = trainProbs.at<double>(sampleIndex, clusterIndex);
for(int di = 0; di < dim; di++ )
{
double val = centeredSample.at<double>(di);
clusterCov.at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0) += p*val*val;
}
}
/获取带概率的协方差矩阵--end///
}
if(covMatType == EM::COV_MAT_SPHERICAL)
clusterCov /= dim;
clusterCov /= weights.at<double>(clusterIndex);
// Update covsRotateMats for EM::COV_MAT_GENERIC only
if(covMatType == EM::COV_MAT_GENERIC)
{
SVD svd(covs[clusterIndex], SVD::MODIFY_A + SVD::FULL_UV);
covsEigenValues[clusterIndex] = svd.w; //特征值矩阵
covsRotateMats[clusterIndex] = svd.u; //旋转矩阵
}
max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
//minEigenValue 正数最小值 略大于0
// update invCovsEigenValues
invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
//取矩阵的倒数
}
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
{
Mat clusterMean = means.row(clusterIndex);
means.row(minWeightClusterIndex).copyTo(clusterMean);//更新最小权值的均值赋与均值
covs[minWeightClusterIndex].copyTo(covs[clusterIndex]);//更新最小权值的Covs
covsEigenValues[minWeightClusterIndex].copyTo(covsEigenValues[clusterIndex]);//更新最小权值的CovsEigenValues
if(covMatType == EM::COV_MAT_GENERIC)//只有 COV_MAT_GENERIC 更新旋转矩阵
covsRotateMats[minWeightClusterIndex].copyTo(covsRotateMats[clusterIndex]);
invCovsEigenValues[minWeightClusterIndex].copyTo(invCovsEigenValues[clusterIndex]);//更新最小权值的invCovsEigenValues
}
}
// Normalize weights
weights /= trainSamples.rows;
}
下面是全部源码:
/*M///
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright( C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
//(including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort(including negligence or otherwise) arising in any way out of
// the use of this software, even ifadvised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
namespace cv
{
const double minEigenValue = DBL_EPSILON;
///
EM::EM(int _nclusters, int _covMatType, const TermCriteria& _termCrit)
{
nclusters = _nclusters;
covMatType = _covMatType;
maxIters = (_termCrit.type & TermCriteria::MAX_ITER) ? _termCrit.maxCount : DEFAULT_MAX_ITERS;
epsilon = (_termCrit.type & TermCriteria::EPS) ? _termCrit.epsilon : 0;
}
EM::~EM()
{
//clear();
}
void EM::clear()
{
trainSamples.release();
trainProbs.release();
trainLogLikelihoods.release();
trainLabels.release();
weights.release();
means.release();
covs.clear();
covsEigenValues.clear();
invCovsEigenValues.clear();
covsRotateMats.clear();
logWeightDivDet.release();
}
bool EM::train(InputArray samples,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
Mat samplesMat = samples.getMat();
setTrainData(START_AUTO_STEP, samplesMat, 0, 0, 0, 0); //1.设置训练数据
return doTrain(START_AUTO_STEP, logLikelihoods, labels, probs); //训练
}
bool EM::trainE(InputArray samples,
InputArray _means0,
InputArray _covs0,
InputArray _weights0,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
Mat samplesMat = samples.getMat();
std::vector<Mat> covs0;
_covs0.getMatVector(covs0);
Mat means0 = _means0.getMat(), weights0 = _weights0.getMat();
setTrainData(START_E_STEP, samplesMat, 0, !_means0.empty() ? &means0 : 0,
!_covs0.empty() ? &covs0 : 0, !_weights0.empty() ? &weights0 : 0);
return doTrain(START_E_STEP, logLikelihoods, labels, probs);
}
bool EM::trainM(InputArray samples,
InputArray _probs0,
OutputArray logLikelihoods,
OutputArray labels,
OutputArray probs)
{
Mat samplesMat = samples.getMat();
Mat probs0 = _probs0.getMat();
setTrainData(START_M_STEP, samplesMat, !_probs0.empty() ? &probs0 : 0, 0, 0, 0);
return doTrain(START_M_STEP, logLikelihoods, labels, probs);
}
Vec2d EM::predict(InputArray _sample, OutputArray _probs) const
{
Mat sample = _sample.getMat();
CV_Assert(isTrained());
CV_Assert(!sample.empty());
if(sample.type() != CV_64FC1)
{
Mat tmp;
sample.convertTo(tmp, CV_64FC1);
sample = tmp;
}
sample.reshape(1, 1);
Mat probs;
if( _probs.needed() )
{
_probs.create(1, nclusters, CV_64FC1);
probs = _probs.getMat();
}
return computeProbabilities(sample, !probs.empty() ? &probs : 0);
}
bool EM::isTrained() const
{
return !means.empty();
}
static
void checkTrainData(int startStep, const Mat& samples,
int nclusters, int covMatType, const Mat* probs, const Mat* means,
const std::vector<Mat>* covs, const Mat* weights)
{
// Check samples.
CV_Assert(!samples.empty()); //训练样本不为空
CV_Assert(samples.channels() == 1); //训练样本是否为单通道数据
int nsamples = samples.rows;//按照行排列数据 ,一行一个数据
int dim = samples.cols;
// Check training params.
CV_Assert(nclusters > 0);
CV_Assert(nclusters <= nsamples);
CV_Assert(startStep == EM::START_AUTO_STEP ||
startStep == EM::START_E_STEP ||
startStep == EM::START_M_STEP);
CV_Assert(covMatType == EM::COV_MAT_GENERIC ||
covMatType == EM::COV_MAT_DIAGONAL ||
covMatType == EM::COV_MAT_SPHERICAL);
CV_Assert(!probs ||
(!probs->empty() &&
probs->rows == nsamples && probs->cols == nclusters &&
(probs->type() == CV_32FC1 || probs->type() == CV_64FC1)));
CV_Assert(!weights ||
(!weights->empty() &&
(weights->cols == 1 || weights->rows == 1) && static_cast<int>(weights->total()) == nclusters &&
(weights->type() == CV_32FC1 || weights->type() == CV_64FC1)));
CV_Assert(!means ||
(!means->empty() &&
means->rows == nclusters && means->cols == dim &&
means->channels() == 1));
CV_Assert(!covs ||
(!covs->empty() &&
static_cast<int>(covs->size()) == nclusters));
if(covs)
{
const Size covSize(dim, dim);
for(size_t i = 0; i < covs->size(); i++)
{
const Mat& m = (*covs)[i];
CV_Assert(!m.empty() && m.size() == covSize && (m.channels() == 1));
}
}
if(startStep == EM::START_E_STEP)
{
CV_Assert(means);
}
else if(startStep == EM::START_M_STEP)
{
CV_Assert(probs);
}
}
static
void preprocessSampleData(const Mat& src, Mat& dst, int dstType, bool isAlwaysClone) //转变类型
{
if(src.type() == dstType && !isAlwaysClone)
dst = src;
else
src.convertTo(dst, dstType);
}
static
void preprocessProbability(Mat& probs) //得到各个样本的概率
{
max(probs, 0., probs); //保证probs大于等于0
const double uniformProbability = (double)(1./probs.cols);
for(int y = 0; y < probs.rows; y++)
{
Mat sampleProbs = probs.row(y);
double maxVal = 0;
minMaxLoc(sampleProbs, 0, &maxVal); //获取每行的最大值
if(maxVal < FLT_EPSILON)
sampleProbs.setTo(uniformProbability); //小于FLOAT 最小值 就置为 uniformProbability 的值
else
normalize(sampleProbs, sampleProbs, 1, 0, NORM_L1); //NORM_L1 各个样本在总样本的概率
}
}
void EM::setTrainData(int startStep, const Mat& samples,
const Mat* probs0,
const Mat* means0,
const std::vector<Mat>* covs0,
const Mat* weights0)
{
clear();
checkTrainData(startStep, samples, nclusters, covMatType, probs0, means0, covs0, weights0); //1(1)检查训练数据
bool isKMeansInit = (startStep == EM::START_AUTO_STEP) || (startStep == EM::START_E_STEP && (covs0 == 0 || weights0 == 0));
//是否为Kmeans初始化的数据
// Set checked data
preprocessSampleData(samples, trainSamples, isKMeansInit ? CV_32FC1 : CV_64FC1, false);
//根据isKMeansInit 选择输出的数据类型
// set probs
if(probs0 && startStep == EM::START_M_STEP)
{
preprocessSampleData(*probs0, trainProbs, CV_64FC1, true); //数据类型转换
preprocessProbability(trainProbs);//获取样本的概率
}
// set weights
if(weights0 && (startStep == EM::START_E_STEP && covs0))
{
weights0->convertTo(weights, CV_64FC1);
weights.reshape(1,1);
preprocessProbability(weights);//获取样本权值
}
// set means EM::START_E_STEP
//You need to provide means a_k of mixture components to use this option.
//Optionally you can pass weights pi_k and covariance matrices S_k of mixture components
if(means0 && (startStep == EM::START_E_STEP/* || startStep == EM::START_AUTO_STEP*/))
means0->convertTo(means, isKMeansInit ? CV_32FC1 : CV_64FC1); //设置数据格式
// set covs
if(covs0 && (startStep == EM::START_E_STEP && weights0))
{
covs.resize(nclusters);
for(size_t i = 0; i < covs0->size(); i++)
(*covs0)[i].convertTo(covs[i], CV_64FC1); //设置数据格式
}
}
void EM::decomposeCovs() //分解
{
CV_Assert(!covs.empty());
covsEigenValues.resize(nclusters); //特征值
if(covMatType == EM::COV_MAT_GENERIC)
covsRotateMats.resize(nclusters); //旋转矩阵
invCovsEigenValues.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
CV_Assert(!covs[clusterIndex].empty());
SVD svd(covs[clusterIndex], SVD::MODIFY_A + SVD::FULL_UV); //对covs[clusterIndex]SVD分解
//SVD::MODIFY_A use the algorithm to modify the decomposed matrix; it can save space
//and speed up processing.
//SVD::NO_UV indicates that only a vector of singular values w is to be processed, while
//u and vt will be set to empty matrices.
//SVD::FULL_UV when the matrix is not square, by default the algorithm produces u
//and vt matrices of sufficiently large size for the further A reconstruction; if, however,
//FULL_UV flag is specified, u and vt will be full-size square orthogonal matrices.
if(covMatType == EM::COV_MAT_SPHERICAL)
{
double maxSingularVal = svd.w.at<double>(0);
covsEigenValues[clusterIndex] = Mat(1, 1, CV_64FC1, Scalar(maxSingularVal));
}
else if(covMatType == EM::COV_MAT_DIAGONAL)
{
covsEigenValues[clusterIndex] = svd.w; //U W V_t V为转置的
}
else //EM::COV_MAT_GENERIC
{
covsEigenValues[clusterIndex] = svd.w; //得到W= 特征值矩阵
covsRotateMats[clusterIndex] = svd.u; //得到U= 旋转矩阵
}
max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
//minEigenValue为double 正数最小值 略大于0
invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
//取倒数
}
}
void EM::clusterTrainSamples()//用Kmeans进行聚类
{
int nsamples = trainSamples.rows;
// Cluster samples, compute/update means
// Convert samples and means to 32F, because kmeans requires this type.
Mat trainSamplesFlt, meansFlt;
if(trainSamples.type() != CV_32FC1)
trainSamples.convertTo(trainSamplesFlt, CV_32FC1);
else
trainSamplesFlt = trainSamples;
if(!means.empty())
{
if(means.type() != CV_32FC1)
means.convertTo(meansFlt, CV_32FC1);
else
meansFlt = means;
}
Mat labels;
//Kmeans 只接受32FC1数据类型
kmeans(trainSamplesFlt, nclusters, labels, TermCriteria(TermCriteria::COUNT, means.empty() ? 10 : 1, 0.5), 10, KMEANS_PP_CENTERS, meansFlt);
// Convert samples and means back to 64F.
CV_Assert(meansFlt.type() == CV_32FC1);
if(trainSamples.type() != CV_64FC1)
{
Mat trainSamplesBuffer;
trainSamplesFlt.convertTo(trainSamplesBuffer, CV_64FC1);
trainSamples = trainSamplesBuffer;
}
meansFlt.convertTo(means, CV_64FC1); //变回64FC1
// Compute weights and covs
weights = Mat(1, nclusters, CV_64FC1, Scalar(0));
covs.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
Mat clusterSamples;
for(int sampleIndex = 0; sampleIndex < nsamples; sampleIndex++) //按照标签顺序加入数据
{
if(labels.at<int>(sampleIndex) == clusterIndex)
{
const Mat sample = trainSamples.row(sampleIndex);
clusterSamples.push_back(sample); // 按照标签顺序加入数据
}
}
CV_Assert(!clusterSamples.empty());
//计算相关矩阵
calcCovarMatrix(clusterSamples, covs[clusterIndex], means.row(clusterIndex), //计算相关矩阵
CV_COVAR_NORMAL + CV_COVAR_ROWS + CV_COVAR_USE_AVG + CV_COVAR_SCALE, CV_64FC1);
weights.at<double>(clusterIndex) = static_cast<double>(clusterSamples.rows)/static_cast<double>(nsamples);
//计算权值 -- 占样本总数的的比例
}
decomposeCovs();
}
void EM::computeLogWeightDivDet()//计算log 的权值
{
CV_Assert(!covsEigenValues.empty());
Mat logWeights;
cv::max(weights, DBL_MIN, weights);//防止数据过小
log(weights, logWeights); //weights(聚类的数据占样本总数的的比例)转到log
logWeightDivDet.create(1, nclusters, CV_64FC1);
// note: logWeightDivDet = log(weight_k) - 0.5 * log(|det(cov_k)|)
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
double logDetCov = 0.;
const int evalCount = static_cast<int>(covsEigenValues[clusterIndex].total()); //有多少特征值
for(int di = 0; di < evalCount; di++)
logDetCov += std::log(covsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0));
// logDetCov为Cov的值的和的log
logWeightDivDet.at<double>(clusterIndex) = logWeights.at<double>(clusterIndex) - 0.5 * logDetCov;
// logWeightDivDet 意味着 logWeight div(-) det(Sum(logcov))
}
}
bool EM::doTrain(int startStep, OutputArray logLikelihoods, OutputArray labels, OutputArray probs)
{
int dim = trainSamples.cols; //dim 样本的特征维数
// Precompute the empty initial train data in the cases of EM::START_E_STEP and START_AUTO_STEP
if(startStep != EM::START_M_STEP)
{
if(covs.empty())
{
CV_Assert(weights.empty());
clusterTrainSamples(); //用Kmeans聚类
}
}
if(!covs.empty() && covsEigenValues.empty() )
{
CV_Assert(invCovsEigenValues.empty());
decomposeCovs(); //得到特征值矩阵
}
if(startStep == EM::START_M_STEP) //You need to provide initial probabilities p_i;k to use this option.
mStep(); //M-Step
double trainLogLikelihood, prevTrainLogLikelihood = 0.;
for(int iter = 0; ; iter++)
{
eStep(); //E-Step
trainLogLikelihood = sum(trainLogLikelihoods)[0];
if(iter >= maxIters - 1)
break; // 大于迭代次数
double trainLogLikelihoodDelta = trainLogLikelihood - prevTrainLogLikelihood;
if( iter != 0 &&
(trainLogLikelihoodDelta < -DBL_EPSILON ||
trainLogLikelihoodDelta < epsilon * std::fabs(trainLogLikelihood))) //终止条件
break;
mStep(); //M-Step
prevTrainLogLikelihood = trainLogLikelihood;
}
if( trainLogLikelihood <= -DBL_MAX/10000. )
{
clear();
return false;
}
// postprocess covs
covs.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(covMatType == EM::COV_MAT_SPHERICAL)
{
covs[clusterIndex].create(dim, dim, CV_64FC1);
setIdentity(covs[clusterIndex], Scalar(covsEigenValues[clusterIndex].at<double>(0)));
}
else if(covMatType == EM::COV_MAT_DIAGONAL)
{
covs[clusterIndex] = Mat::diag(covsEigenValues[clusterIndex]);
}
}
if(labels.needed())
trainLabels.copyTo(labels);
if(probs.needed())
trainProbs.copyTo(probs);
if(logLikelihoods.needed())
trainLogLikelihoods.copyTo(logLikelihoods);
trainSamples.release();
trainProbs.release();
trainLabels.release();
trainLogLikelihoods.release();
return true;
}
Vec2d EM::computeProbabilities(const Mat& sample, Mat* probs) const
{
// L_ik = log(weight_k) - 0.5 * log(|det(cov_k)|) - 0.5 *(x_i - mean_k)' cov_k^(-1) (x_i - mean_k)]
// q = arg(max_k(L_ik))
// probs_ik = exp(L_ik - L_iq) / (1 + sum_j!=q (exp(L_ij - L_iq))
// see Alex Smola's blog http://blog.smola.org/page/2 for
// details on the log-sum-exp trick
CV_Assert(!means.empty());
CV_Assert(sample.type() == CV_64FC1);
CV_Assert(sample.rows == 1);
CV_Assert(sample.cols == means.cols);
int dim = sample.cols;
Mat L(1, nclusters, CV_64FC1); //L 1*nclusters
int label = 0;
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
const Mat centeredSample = sample - means.row(clusterIndex); //减去均值
Mat rotatedCenteredSample = covMatType != EM::COV_MAT_GENERIC ?
centeredSample : centeredSample * covsRotateMats[clusterIndex];
double Lval = 0;
for(int di = 0; di < dim; di++)
{
double w = invCovsEigenValues[clusterIndex].at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0);
//对角线上的值或者每行第一个值
double val = rotatedCenteredSample.at<double>(di);
Lval += w * val * val;//方差乘以权值 协方差矩阵的倒数的平方
}
CV_DbgAssert(!logWeightDivDet.empty());
L.at<double>(clusterIndex) = logWeightDivDet.at<double>(clusterIndex) - 0.5 * Lval;
// note: logWeightDivDet = log(weight_k) - 0.5 * log(|det(cov_k)|)
// note: L.at<double>(clusterIndex) = log(weight_k) - 0.5 * log(|det(cov_k)|)-0.5 * Lval
if(L.at<double>(clusterIndex) > L.at<double>(label))
label = clusterIndex; //求最大label值
}
double maxLVal = L.at<double>(label); //
Mat expL_Lmax = L; // exp(L_ij - L_iq) //L 1*nclusters
for(int i = 0; i < L.cols; i++)
expL_Lmax.at<double>(i) = std::exp(L.at<double>(i) - maxLVal);
double expDiffSum = sum(expL_Lmax)[0]; // sum_j(exp(L_ij - L_iq))
if(probs) //probs
{
probs->create(1, nclusters, CV_64FC1);
double factor = 1./expDiffSum;
expL_Lmax *= factor;
expL_Lmax.copyTo(*probs);
}
Vec2d res;
res[0] = std::log(expDiffSum) + maxLVal - 0.5 * dim * CV_LOG2PI; //dim样本维数 CV_LOG2PI (1.8378770664093454835606594728112)
//
res[1] = label;
return res;
}
void EM::eStep()
{
// Compute probs_ik from means_k, covs_k and weights_k.
trainProbs.create(trainSamples.rows, nclusters, CV_64FC1);
//概率矩阵(样本个数,聚类个数,数据类型)
trainLabels.create(trainSamples.rows, 1, CV_32SC1);
//标签向量(样本个数,1,数据类型)
trainLogLikelihoods.create(trainSamples.rows, 1, CV_64FC1);
//log后的似然(样本个数,1,数据类型)
computeLogWeightDivDet(); //计算 LogWeightDivDet
CV_DbgAssert(trainSamples.type() == CV_64FC1); //数据类型检测
CV_DbgAssert(means.type() == CV_64FC1); //数据类型检测
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
Mat sampleProbs = trainProbs.row(sampleIndex);
Vec2d res = computeProbabilities(trainSamples.row(sampleIndex), &sampleProbs);
//计算概率
trainLogLikelihoods.at<double>(sampleIndex) = res[0];
trainLabels.at<int>(sampleIndex) = static_cast<int>(res[1]);
}
}
void EM::mStep()
{
// Update means_k, covs_k and weights_k from probs_ik
int dim = trainSamples.cols;
// Update weights
// not normalized first
reduce(trainProbs, weights, 0, CV_REDUCE_SUM); //计算每列的概率和
/*
cvReduce( const CvArr* src, CvArr* dst, int dim, int op=CV_REDUCE_SUM);
src
输入矩阵
dst
输出的通过处理输入矩阵的所有行/列而得到的单行/列向量
dim
矩阵被简化后的维数索引.0意味着矩阵被处理成一行,1意味着矩阵被处理成为一列,-1时维数将根据输出向量的大小自动选择.
op
简化操作的方式,可以有以下几种取值:
CV_REDUCE_SUM-输出是矩阵的所有行/列的和.
CV_REDUCE_AVG-输出是矩阵的所有行/列的平均向量.
CV_REDUCE_MAX-输出是矩阵的所有行/列的最大值.
CV_REDUCE_MIN-输出是矩阵的所有行/列的最小值.
*/
// Update means
means.create(nclusters, dim, CV_64FC1);
means = Scalar(0);
const double minPosWeight = trainSamples.rows * DBL_EPSILON; //小概率
double minWeight = DBL_MAX;
int minWeightClusterIndex = -1;
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
//概率过小跳过
continue;
if(weights.at<double>(clusterIndex) < minWeight)//求最小概率值
{
minWeight = weights.at<double>(clusterIndex);
minWeightClusterIndex = clusterIndex;//得到聚类索引值
}
Mat clusterMean = means.row(clusterIndex);
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
clusterMean += trainProbs.at<double>(sampleIndex, clusterIndex) * trainSamples.row(sampleIndex);
clusterMean /= weights.at<double>(clusterIndex);
}
// Update covsEigenValues and invCovsEigenValues
covs.resize(nclusters);
covsEigenValues.resize(nclusters);
if(covMatType == EM::COV_MAT_GENERIC)
covsRotateMats.resize(nclusters);
invCovsEigenValues.resize(nclusters);
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
//概率过小跳过
continue;
if(covMatType != EM::COV_MAT_SPHERICAL)
covsEigenValues[clusterIndex].create(1, dim, CV_64FC1);
else
covsEigenValues[clusterIndex].create(1, 1, CV_64FC1);
if(covMatType == EM::COV_MAT_GENERIC)
covs[clusterIndex].create(dim, dim, CV_64FC1);
Mat clusterCov = covMatType != EM::COV_MAT_GENERIC ?
covsEigenValues[clusterIndex] : covs[clusterIndex];
clusterCov = Scalar(0);
Mat centeredSample;
for(int sampleIndex = 0; sampleIndex < trainSamples.rows; sampleIndex++)
{
centeredSample = trainSamples.row(sampleIndex) - means.row(clusterIndex);
//centeredSample = 样本值-平均值的差值 //向量
获取带概率的协方差矩阵--begin
if(covMatType == EM::COV_MAT_GENERIC)
clusterCov += trainProbs.at<double>(sampleIndex, clusterIndex) * centeredSample.t() * centeredSample;
else
{
double p = trainProbs.at<double>(sampleIndex, clusterIndex);
for(int di = 0; di < dim; di++ )
{
double val = centeredSample.at<double>(di);
clusterCov.at<double>(covMatType != EM::COV_MAT_SPHERICAL ? di : 0) += p*val*val;
}
}
/获取带概率的协方差矩阵--end///
}
if(covMatType == EM::COV_MAT_SPHERICAL)
clusterCov /= dim;
clusterCov /= weights.at<double>(clusterIndex);
// Update covsRotateMats for EM::COV_MAT_GENERIC only
if(covMatType == EM::COV_MAT_GENERIC)
{
SVD svd(covs[clusterIndex], SVD::MODIFY_A + SVD::FULL_UV);
covsEigenValues[clusterIndex] = svd.w; //特征值矩阵
covsRotateMats[clusterIndex] = svd.u; //旋转矩阵
}
max(covsEigenValues[clusterIndex], minEigenValue, covsEigenValues[clusterIndex]);
//minEigenValue 正数最小值 略大于0
// update invCovsEigenValues
invCovsEigenValues[clusterIndex] = 1./covsEigenValues[clusterIndex];
//取矩阵的倒数
}
for(int clusterIndex = 0; clusterIndex < nclusters; clusterIndex++)
{
if(weights.at<double>(clusterIndex) <= minPosWeight)
{
Mat clusterMean = means.row(clusterIndex);
means.row(minWeightClusterIndex).copyTo(clusterMean);//更新最小权值的均值赋与均值
covs[minWeightClusterIndex].copyTo(covs[clusterIndex]);//更新最小权值的Covs
covsEigenValues[minWeightClusterIndex].copyTo(covsEigenValues[clusterIndex]);//更新最小权值的CovsEigenValues
if(covMatType == EM::COV_MAT_GENERIC)//只有 COV_MAT_GENERIC 更新旋转矩阵
covsRotateMats[minWeightClusterIndex].copyTo(covsRotateMats[clusterIndex]);
invCovsEigenValues[minWeightClusterIndex].copyTo(invCovsEigenValues[clusterIndex]);//更新最小权值的invCovsEigenValues
}
}
// Normalize weights
weights /= trainSamples.rows;
}
void EM::read(const FileNode& fn)
{
Algorithm::read(fn);
decomposeCovs();
computeLogWeightDivDet();
}
} // namespace cv
/* End of file. */