最近较忙,代码先码一下,后续再更新仔细讲解
Eigen是个第三方包,自己找个教程安装一下,里面的计算是根据高斯混合模型的公式推导结果写的,终止条件那里可以自己改,目前只是用次数控制的。
1.gmm.h
#include <Eigen/Dense>
using namespace Eigen;
void GMM_EM(MatrixXd Y, int K, int times, double thre);
2.gmm.cpp
#include "gmm.h"
#include <opencv2/opencv.hpp>
#define _USE_MATH_DEFINES
#include <math.h>
#include <vector>
#include <Eigen/Dense>
#include <random>
using namespace cv;
using namespace std;
using namespace Eigen;
class GmmParam {
MatrixXd mu;
Matrix<MatrixXd, Dynamic, Dynamic> cov;
VectorXd alpha;
//函数 给属性赋值
public:
void init(MatrixXd mu1, Matrix<MatrixXd, Dynamic, Dynamic> cov1, VectorXd alpha1) {
mu = mu1;
cov = cov1;
alpha = alpha1;
}
MatrixXd getMu() {
return mu;
}
Matrix<MatrixXd, Dynamic, Dynamic> getCov() {
return cov;
}
VectorXd getAlpha() {
return alpha;
}
};
/**
* pdf标量公式:f(X)=1/(δ(2Π)^1/2)exp(-0.5((x-mu)^2/δ^2))
* pdf多维向量公式:f(X)=1/(((2Π)^k*|cov|)^1/2)exp(-0.5((x-mu)'*cov*(x-mu)))
* mu_k是D*1(因为pdf的推导公式是列向量,所以这里得传列向量)
* cov_k是D*D
* Y_j是D*1(为了和mu_k对应)
**/
double phi(MatrixXd mu_k, MatrixXd cov_k, MatrixXd Y_j) {
int D = mu_k.rows();//数据维度
auto dev = Y_j - mu_k;
/*cout << "dev:\n" << dev << endl << endl;
cout << "dev:\n" << cov_k << endl << endl;
cout << "dev:\n" << cov_k.inverse() << endl << endl;*/
auto maha = dev.transpose()*cov_k.inverse()*dev;
/*cout << "maha:\n" << maha << endl << endl;
cout << "maha00:\n" << maha(0, 0) << endl << endl;*/
double a = exp(-0.5*maha(0, 0));
double b = cov_k.determinant();
double result = 1 / (sqrt(pow(2 * M_PI, D)*b))*a;
return result;
}
//mu是K*D,cov是K*D*D(外层是一行K列,每1行1列是个D*D的矩阵),Y是N*D
MatrixXd E_step(MatrixXd Y, MatrixXd mu, Matrix<MatrixXd, Dynamic, Dynamic> cov, VectorXd alpha) {
int N = Y.rows();//样本数
int D = Y.cols();//数据维度
int K = alpha.size();//类别
MatrixXd gamma(N, K);
vector<double> gammaSum(N, 0);
for (int j = 0; j < N; j++) {
for (int k = 0; k < K; k++)
{
gamma(j, k) = alpha[k] * phi(mu.row(k).transpose(), cov(0, k), Y.row(j).transpose());
gammaSum[j] = gammaSum[j] + gamma(j, k);
}
}
for (int j = 0; j < N; j++) {
gamma.row(j) = gamma.row(j) / gammaSum[j];
}
cout << "gamma:\n" << gamma << endl << endl;
return gamma;
}
//mu是K*D,cov是K*D*D(外层是一行K列,每1行1列是个D*D的矩阵),Y是N*D,gamma是N*K
GmmParam M_step(MatrixXd Y, MatrixXd gamma, MatrixXd mu, Matrix<MatrixXd, Dynamic, Dynamic> cov, VectorXd alpha) {
int K = gamma.cols();//类别
int N = Y.rows();//样本数
int D = Y.cols();//维度
VectorXd Nk = gamma.colwise().sum();//记录属于这个component的样本的响应度和
MatrixXd Y_gamma(Y.rows(), Y.cols());//记录每个component对每个值的贡献程度
for (int k = 0; k < K; k++) {//gamma作为系数和Y相乘,而不是点乘
for (int j = 0; j < N; j++) {
Y_gamma.row(j) = gamma(j, k)*Y.row(j);//通过Y和响应度系数相乘计算预测出来的贡献程度
}
alpha[k] = Nk[k] / N;
mu.row(k) = Y_gamma.colwise().sum() / Nk[k];
}
MatrixXd cov_gamma(N, D);//记录预测出来的观测值
MatrixXd dev(N, D);
for (int k = 0; k < K; k++) {//gamma作为系数和Y相乘,而不是点乘
for (int j = 0; j < N; j++) {
dev.row(j) = Y.row(j) - mu.row(k);
cov_gamma.row(j) = gamma(j, k)*dev.row(j);//通过Y和响应度系数相乘计算预测出来的观测值
}
cov(0, k) = dev.transpose()*cov_gamma / Nk[k];//D*D
}
GmmParam gmmParam;
gmmParam.init(mu, cov, alpha);
return gmmParam;
}
void GMM_EM(MatrixXd Y, int K, int times, double thre) {
int N = Y.rows();//样本数
int D = Y.cols();//数据维度
//初始化均值、协方差、权重系数
MatrixXd mu = MatrixXd::Random(K, D);
/*MatrixXd mu(K, D);
mu << 0.04937, 0.73136,
0.69989, 0.05192;*/
cout << "mu:\n" << mu << endl << endl;
Matrix<MatrixXd, Dynamic, Dynamic> cov;
cov.resize(1, K);
for (int i = 0; i < K; i++) {
MatrixXd cov_k = MatrixXd::Identity(D, D);
cov(0, i) = cov_k;
}
VectorXd alpha = VectorXd::Ones(K) * (1.0 / K);
cout << "alpha:\n" << alpha << endl << endl;
for (int i = 0; i < times; i++) {
MatrixXd gamma = E_step(Y, mu, cov, alpha);//用这个的delta*delta'来控制终止条件(或者mu/cov/alpha)
GmmParam gmmParam = M_step(Y, gamma, mu, cov, alpha);
mu = gmmParam.getMu();
alpha = gmmParam.getAlpha();
cov = gmmParam.getCov();
cout << "cov:\n" << cov(0, 0) << endl << endl;
}
cout << "mu:\n" << mu << endl << endl;
cout << "alpha:\n" << alpha << endl << endl;
}
3.main.cpp---------这里的测试数据不想折腾了,直接用python生成的随机数copy过来的,K是类别数(GMM组件数),40是迭代次数,thre是控制终止条件的阈值(目前没有用,可自行修改)。
#include <opencv2/opencv.hpp>
#include "gmm.h"
using namespace Eigen;
using namespace cv;
using namespace std;
int main() {
MatrixXd Y(40,2);
Y << 0.90399, 1.75717,
1.90245, 0.90256,
1.05317, 1.89239,
2.20109, 1.95815,
-0.70211, 1.47465,
0.83586, 1.20565,
1.36202, 1.48171,
-0.38448, 0.37853,
-1.15287, 2.19155,
-0.94483, 1.07071,
-0.35403, 0.72768,
0.38726, 1.16318,
-1.09941, 0.97615,
-0.52283, 0.68882,
-0.39215, 0.35754,
0.29295, 0.39324,
0.02062, 0.95334,
0.49576, 1.58207,
2.10293, 0.95540,
-0.54234, 1.96534,
3.19150, 4.56735,
1.00127, 3.94349,
1.33137, 4.29915,
3.22761, 3.65609,
1.32311, 4.71277,
3.07591, 4.57711,
2.09273, 3.07348,
1.44289, 4.22786,
2.71425, 4.85522,
2.25975, 7.12721,
2.36885, 5.64459,
1.87930, 4.80476,
1.81077, 7.25826,
2.93794, 4.60977,
1.72519, 3.75386,
1.84943, 5.28985,
2.65972, 4.54997,
2.81061, 5.15333,
0.91457, 6.11026,
2.23573, 3.90546;
double thre = 0.1;
int K = 2;
GMM_EM(Y,K,40, thre);
}