GMM_2;GMM_test_3;
可视化:
- opencv
- c++
- 其他
出现nan问题
sumE,分母为0
偏离高斯中心太远,概率密度太小???(不是主要原因,但有关系)
- 把初值设置更接近原始样本
- 样本的方差改小,增加高斯覆盖范围,使其不易为0
- 增加补偿常数,使其不为0(增加了算法使用范围,厉害)
万万没想到
初始化问题,自身累加,使用了没有初始化的数组,里面有乱码!!
- 初始化数组
二、改进
- 模型初始化?
- 高纬拓展
- 分类
数组同值初始化:fill(begin(), end(), value)
补偿常数不能设太大,不然会衍生靠近平均值的虚拟模型!!
得到的模型,进行筛选
- mean nan x
- var 太大 x
- mean 相似 合并
我是小匹,万万没想到,我在做,高斯混合模型。
欢迎学术交流。
转载注明:
作者:匹诺曹患者
#include <iostream>
#include <cmath>
#include <limits>
#include <cstdlib>
#include <ctime>
#include<iomanip>
#include <opencv.hpp>
#define PI 3.1415
using namespace std;
using namespace cv;
// 计算数组和
long double arrSum(long double *arr, int arrLength);
// 计算数组均值
long double Mean(long double *arr, int length);
// 计算数组方差
long double Var(long double *arr, long double mean, int length);
// 正态分布概率密度计算
long double NormalDistribution(long double x, long double mean, long double sigma2);
// 产生正态分布随机数
long double generateGaussianNoise(long double mu, long double sigma);
int main()
{
static int a = 1;
// 随机数函数生成2个由一维高斯分布组成的高斯混合模型,X ~ 0.6125*N(120, 2) + 0.3875*N(300, 5)
srand(time(NULL));//随机发生器
long double sample[4000];
const int sampLen = sizeof(sample) / sizeof(*sample);//计算分配内存的大小
long double originalArrHead[2450];
long double originalArrTail[sampLen - 2450];
for (int i = 0; i < 4000; i++) {
if (i < 2450) {
sample[i] = generateGaussianNoise(120, 2);
originalArrHead[i] = sample[i];
}
else {
sample[i] = generateGaussianNoise(300, 5);
originalArrTail[i - 2450] = sample[i];
}
}
//可视化
/*Mat image(500, 500, CV_32FC1);
for (int i = 0; i < 4000; i++) {
int aa; aa = round(sample[i]);
Point point;
if(i<2450)
point = {100,rows-aa};
else
point = { 200,rows-aa };
circle(image, point, 5, Scalar(255), 4);
}
imshow("1", image); waitKey(30); waitKey(0);*/
//可视化2
/*long double primerMu2[] = { 120, 120 };
long double primerSigmaSquare2[] = { 10, 20 };
int cols = 500, rows = 400;
Mat image2(rows, cols, CV_8UC3, Scalar(127, 127, 127));
for (int i = 0; i < 500; i++) {
int aa; aa = round(1000 * NormalDistribution(i, primerMu2[0], primerSigmaSquare2[0]));
Point point;
point = { i,rows - aa };
circle(image2, point, 1, Scalar(255, 0, 0), 4);
}
for (int i = 0; i < 500; i++) {
int aa; aa = round(1000 * NormalDistribution(i, primerMu2[1], primerSigmaSquare2[1]));
Point point;
point = { i,rows - aa };
circle(image2, point, 1, Scalar(0, 255, 0), 4);
}
imshow("2", image2); waitKey(30); waitKey(0);*/
cout << "True distribution" << endl;
cout << "X ~ " << 2450.0 / 4000 << "N(" << Mean(originalArrHead, 2450) << ", " << Var(originalArrHead, Mean(originalArrHead, 2450), 2450) << ")";
cout << " + " << 1 - 2450.0 / 4000 << "N(" << Mean(originalArrTail, sampLen - 2450) << ", " << Var(originalArrTail, Mean(originalArrTail, sampLen - 2450), sampLen - 2450);
cout << ")" << endl << endl;
// 混合系数, 均值, 方差的初始化
cout << "Begin EM Algorithm" << endl;
cout << "Initial mu, sigimaSquare, alpha values" << endl;
long double primerMu[] = { 110, 320 };
long double primerSigmaSquare[] = { 5, 5 };
long double primerAlpha[] = { 0.5, 0.5 };
//if (a == 1)for (int j = 0; j < 2; j++)cout << primerMu[j] << " "; cout << endl;
int count = 0;
// 输出初始化参数
cout << "X ~ " << primerAlpha[0] << "N(" << primerMu[0] << ", " << primerSigmaSquare[0] << ")";
cout << " + " << primerAlpha[1] << "N(" << primerMu[1] << ", " << primerSigmaSquare[1] << ")" << endl << endl;
while (count < 20) {
//E
//gamma
long double E_j[2][sampLen] = {0};
for (int i = 0; i < 2; i++) {
for (int j = 0; j < sampLen; j++) {
E_j[i][j] = primerAlpha[i] * NormalDistribution(sample[j], primerMu[i], primerSigmaSquare[i]);
}
}//E_j[2][j]异常,很多0,样本方差太大
//cout << setiosflags(ios::fixed) << setprecision(10) << endl;
//if (a == 1)for (int j = 0; j < sampLen; j++)cout << E_j[1][j] << " ";
long double E_sum[sampLen] = { 0 };
for (int j = 0; j < sampLen; j++) {
E_sum[j] = E_j[0][j] + E_j[1][j];//有些为0
}
//if (a == 1)for (int j = 0; j < sampLen; j++)cout << E_sum[j] << " ";
long double E_gamma[2][sampLen] = { 0 };
for (int i = 0; i < 2; i++) {
for (int j = 0; j < sampLen; j++) {
if(E_sum[j] == 1000000)//?行不行
E_gamma[i][j] = 0;
else E_gamma[i][j] = E_j[i][j] / E_sum[j];
}
}
//if (a == 1)for (int j = 0; j < sampLen; j++)cout << E_gamma[0][j] << " ";
//M
//mu
long double sum_gamma[2] = { 0 };
for (int i = 0; i < 2; i++) {
for (int j = 0; j < sampLen; j++) {
sum_gamma[i] += E_gamma[i][j];
}
}
//if (a == 1)for (int j = 0; j <2; j++)cout << sum_gamma[j] << " ";
long double sum_gamma_j[2] = { 0 };
for (int i = 0; i < 2; i++) {
for (int j = 0; j < sampLen; j++) {
sum_gamma_j[i] += E_gamma[i][j]* sample[j];
}
}
//if (a == 1)for (int j = 0; j < 2; j++)cout << sum_gamma_j[j] << " ";
for (int i = 0; i < 2; i++) {
primerMu[i] = sum_gamma_j[i] / sum_gamma[i];
}
//if (a == 1)for (int j = 0; j < 2; j++)cout << primerMu[j] << " ";
//sigma
long double sum_sigma_j[2] = { 0 };
for (int i = 0; i < 2; i++) {
for (int j = 0; j < sampLen; j++) {
sum_sigma_j[i] += E_gamma[i][j] * (sample[j]- primerMu[i])* (sample[j] - primerMu[i]);
}
}
for (int i = 0; i < 2; i++) {
primerSigmaSquare[i] = sum_sigma_j[i] / sum_gamma[i];
}
//alpha
for (int i = 0; i < 2; i++) {
primerAlpha[i] = sum_gamma[i]/ sampLen;
}
cout << count + 1 << "th " << "iterations" << endl;
cout << "X ~ " << primerAlpha[0] << "N(" << primerMu[0] << ", " << primerSigmaSquare[0] << ")";
cout << " + " << primerAlpha[1] << "N(" << primerMu[1] << ", " << primerSigmaSquare[1] << ")" << endl;
cout << endl;
count++; a++;
}
return 0;
}
long double arrSum(long double *arr, int arrLength) {
long double sum = 0;
for (int i = 0; i < arrLength; i++)
sum += arr[i];
return sum;
}
long double Mean(long double *arr, int arrLength) {
long double sum = 0;
for (int i = 0; i < arrLength; i++)
sum += arr[i];
return sum / arrLength;
}
long double Var(long double *arr, long double mean, int arrLength) {
long double Dx = 0;
for (int i = 0; i < arrLength; i++) {
Dx += (arr[i] - mean) * (arr[i] - mean);
}
Dx = (long double)1 / (arrLength - 1) * Dx;
return Dx;
}
long double NormalDistribution(long double x, long double mu, long double sigma2) {
return ((exp(-(x - mu)*(x - mu) / (2 * sigma2))) / sqrt(2 * PI*sigma2));
}
long double generateGaussianNoise(long double mu, long double sigma2) {
long double sigma = sqrt(sigma2);//平方根 [ˌskwer ˈruːt]
const long double epsilon = std::numeric_limits<long double>::min();
const long double two_pi = 2.0*3.14159265358979323846;
static long double z0, z1;
static bool generate;
generate = !generate;
if (!generate)
return z1 * sigma + mu;
long double u1, u2;
do
{
u1 = rand() * (1.0 / RAND_MAX);//随机数
u2 = rand() * (1.0 / RAND_MAX);
} while (u1 <= epsilon);
z0 = sqrt(-2.0 * log(u1)) * cos(two_pi * u2);
z1 = sqrt(-2.0 * log(u1)) * sin(two_pi * u2);
return z0 * sigma + mu;
}