在模式识别课程中,学习了GMM算法。并实现1维GMM算法。
代码如下:
/*
Author : Li Pan
Time : 2012/9/20
Version : 1.0
Using this file to test GMM algorithm.
Input : input the the gaussian distribution number, the real u1, u2, ...,
the real xigama(1), sigma(2), sigma(3), and input the expected gaussian
distribution number...
*/
#include
#include
#include
#include
#define SAMPLE_NUMBER 2000
#define MAX_K 10
#define EPS 0.000001
#define PI 3.141593
typedef struct Element {
float numerator;
float denominator;
float result;
}Element;
Element elements[SAMPLE_NUMBER][MAX_K];
int gaussian_distribution_number;
float *means;
float *variances;
float *probabilities;
float samples[SAMPLE_NUMBER];
void generate_samples();
void free_mvp();
void calculate_arguments(int K);
float _fill_in_elements_and_return_q(int K);
float _sum(int total_sample,int col_number);
float _sum_with_vector(int total_sample,int col_number,float * multiply_vector);
float _gaussian_distribution(float x,float mean,float variance);
void print_mvp(int expected_gaussian_distribution_number);
int main() {
int i;
int expected_gaussian_distribution_number;
fprintf(stdout, "Please enter the gaussian distribution number : \n");
fscanf(stdin, "%d", &gaussian_distribution_number);
means = (float *)malloc(gaussian_distribution_number * sizeof(float));
assert(NULL != means);
variances = (float *)malloc(gaussian_distribution_number * sizeof(float));
assert(NULL != variances);
probabilities = (float *)malloc(gaussian_distribution_number * sizeof(float));
assert(NULL != probabilities);
fprintf(stdout, "Please enter the means,variances and probability for each gaussian distribution with format [mean variance probability]\n");
for (i = 0; i < gaussian_distribution_number; ++i) {
fscanf(stdin, "%f %f %f", &means[i], &variances[i], &probabilities[i]);
}
fprintf(stdout, "Enter the expected gaussian distribution number :\n");
fscanf(stdin, "%d", &expected_gaussian_distribution_number);
generate_samples();
calculate_arguments(expected_gaussian_distribution_number); /*Fill in means, variances and probabilities*/
print_mvp(expected_gaussian_distribution_number);
free_mvp();
return 0;
}
void generate_samples() {
int i;
int j;
int selected_gaussian_distribution;
int n = 20; /*When building gaussian distribution, use n.*/
float x = 0;
float pro;
/*Here we change the elements in probabilities*/
for (i = 1; i < gaussian_distribution_number; ++i) {
probabilities[i] += probabilities[i - 1];
}
srand(time(0));
for (i = 0; i < SAMPLE_NUMBER; ++i) {
pro = (float)(rand() % 1001) * 0.001f;
for (j = 0; j < gaussian_distribution_number; ++j) {
if (pro <= probabilities[j]) {
selected_gaussian_distribution = j;
break;
}
}
x = 0.0;
for (j = 0; j < n; ++j) {
x+= (float)((rand() % 1001) * 0.001f);
}
x = (x - (float)n / 2) / sqrt((float)n / 12); /*This formula is used to build gaussian distribution!*/
samples[i] = sqrt(variances[selected_gaussian_distribution]) * x + means[selected_gaussian_distribution];
}
free_mvp();
}
void free_mvp() {
free(means);
free(variances);
free(probabilities);
}
/*It is the core part of this program!*/
void calculate_arguments(int K) {
int i;
int t;
float temp[SAMPLE_NUMBER];
float f;
float previous_q;
float current_q = 0.0;
/*_allocate_and_initialize_mvp(K);*/
_allocate_and_initialize_mvp(K);
previous_q = _fill_in_elements_and_return_q(K);
for (; ;) {
/*Calculating probabilities*/
for (i = 0; i < K; ++i) {
probabilities[i] = 1.0f * _sum(SAMPLE_NUMBER, i)/ SAMPLE_NUMBER;
}
/*Calculating means*/
for (i = 0; i < K; ++i) {
means[i] = _sum_with_vector(SAMPLE_NUMBER, i, samples) / (_sum(SAMPLE_NUMBER, i));
}
/*Calculating variances*/
for (i = 0; i < K; ++i) {
for (t = 0; t < SAMPLE_NUMBER; ++t) {
temp[t] = (samples[t] - means[i]) * (samples[t] - means[i]);
}
variances[i] = _sum_with_vector(SAMPLE_NUMBER,i,temp)/ (_sum(SAMPLE_NUMBER, i));
}
current_q = _fill_in_elements_and_return_q(K);
if (fabs(current_q - previous_q) < EPS) {
fprintf(stdout, "previous_q : %f\n", previous_q);
fprintf(stdout, "current_q : %f\n", current_q);
break;
} else {
previous_q = current_q;
}
}
}
float _sum(int total_sample, int col_number) {
float result = 0.0f;
int i;
for (i = 0; i < total_sample; ++i) {
result += elements[i][col_number].result;
}
return result;
}
float _sum_with_vector(int total_sample, int col_number, float *multiply_vector) {
float result = 0.0f;
int i;
for (i = 0; i < total_sample; ++i) {
result += elements[i][col_number].result * multiply_vector[i];
}
return result;
}
void _allocate_and_initialize_mvp(int K) {
int i;
float total = 0.0f;
float mean;
float variance;
float step1;
float step2;
means = (float *)malloc(K * sizeof(float));
variances = (float *)malloc(K * sizeof(float));
probabilities = (float*)malloc(K * sizeof(float));
for (i = 0; i < SAMPLE_NUMBER; ++i) {
total += samples[i];
}
mean = total / SAMPLE_NUMBER;
step1 = mean / K;
for (i = 0; i < SAMPLE_NUMBER; ++i) {
total += pow(samples[i] - mean, 2);
}
variance = total / SAMPLE_NUMBER;
step2 = variance / K;
for (i = 0; i < K; ++i) {
//means[i] = (float)(rand() % ((int)ceil(mean))) + (float)(rand() % ((int)ceil(mean)));
means[i] = mean + pow(-1, i) * step1 * i;
fprintf(stdout, "%d : %f\n", i, means[i]);
variances[i] = variance + pow(-1, i) * step2 * i;//1.0f;
probabilities[i] = 1.0f / K;
}
}
float _fill_in_elements_and_return_q(int K) {
int i;
int t;
float f;
float current_q = 0.0f;
for (t = 0; t < SAMPLE_NUMBER; ++t) {
f = 0.0f;
for (i = 0; i < K; ++i) {
f+= probabilities[i] * _gaussian_distribution(samples[t],means[i], variances[i]);
}
/*f may be 0!
Return value from _gaussian_distribution may be 0!
*/
//fprintf(stdout, "_fill_in... f : %f\n", f);
for (i = 0; i < K; ++i) {
elements[t][i].numerator = probabilities[i] * _gaussian_distribution(samples[t],means[i], variances[i]);
elements[t][i].denominator = f;
elements[t][i].result = elements[t][i].numerator / elements[t][i].denominator;
}
}
current_q = 0.0;
for (t = 0; t < SAMPLE_NUMBER; ++t) {
current_q += log(elements[t][0].denominator);
}
return current_q;
}
float _gaussian_distribution(float x, float mean, float variance) {
float result = 0.0f;
result = 1.0f / (sqrt(2 * PI) * sqrt(variance)) * exp((x - mean) * (x - mean) / (-2 * variance));
return result;
}
void print_mvp(int expected_gaussian_distribution_number) {
int i;
fprintf(stdout, "\nidprobabilitymeanvariance");
for (i = 0; i < expected_gaussian_distribution_number; ++i) {
fprintf(stdout, "\n%d%f%f%f\n", (i + 1), probabilities[i], means[i], variances[i]);
}
}
/*
Finished time: 2012/9/21 12:33
*/
该算法对某些样本可以很好滴估计。