PCA在视觉算法中有着重要的应用,是常见的降维方法,它属于无监督学习的范畴。关于PCA的理论在其它博客中也非常常见,本文利用C++实现了PCA的功能,基于该程序做改动,可以将PCA运用于自己的项目需求中。
#ifndef _PCA_H_
#define _PCA_H_
#include<iostream>
#include<cstdlib>
#include<malloc.h>
#include<string.h>
#include<cmath>
using namespace std;
template<typename T>
struct key_val
{
int key;
T* val;
};
template<typename T>
struct DSet
{
int nDim;
T* nElement;
DSet()
{
nElement=NULL;
nDim=0;
}
};
/*template<typename T>
int compare(const T* a,const T* b )
{
return ((*(T*)a-*(T*)b)>0?1:0;
}*/
template<class T>
class PCA
{
public:
PCA(int nSample,int nDim);
~PCA();
bool Load_Pca_Data(DSet<double> m_Set[]);
bool Compute_Eigen_Matrix();
void Print_Feature_Value();
T* get_Projection_matrix();
DSet<double> get_mean_vector();
int get_nr();
//T get_thresh();
private:
DSet<double> mean_Set;
T* Set;
T* PSet;
T* Mean_Matrix;
T* Conv_Matrix;
T* Eigen_Vector;
T* Eigen_Val;
//T Thresh;
int dim;
int sample;
int nR;
float ratio;
bool isLoad;
bool isCompute;
bool isProjection;
};
template<class T>
PCA<T>::PCA(int nSample,int nDim)
{
sample=nSample;
dim = nDim;
nR=0;
ratio=0.85;
//Thresh=0;
isProjection=false;
Mean_Matrix=(T*)malloc(nSample*nDim*sizeof(T));
Conv_Matrix=(T*) malloc(nDim*nDim*sizeof(T));
Eigen_Vector=(T*)malloc(nDim*nDim*sizeof(T));
Eigen_Val=(T*)malloc(nDim*sizeof(sizeof(T)));
Set=(T*)malloc(nSample*nDim*sizeof(T));
mean_Set.nDim=dim;
mean_Set.nElement=(T*)malloc(dim*sizeof(T));
memset((void*) mean_Set.nElement,0,dim*sizeof(T));
PSet=NULL;
memset((void*)Conv_Matrix,0,nDim*nDim*sizeof(T));
memset((void*)Eigen_Vector,0,nDim*nDim*sizeof(T));
memset((void*)Eigen_Val,0,nDim*sizeof(T));
for(int i=0;i<nDim;i++) Eigen_Vector[i*nDim+i]=1;
isLoad=false;
isCompute=false;
}
template<class T>
PCA<T>::~PCA()
{
if(Mean_Matrix) free(Mean_Matrix);
if(Conv_Matrix) free(Conv_Matrix);
if(Eigen_Vector) free(Eigen_Vector);
if(Eigen_Val) free(Eigen_Val);
if(Set) free(Set);
if(PSet) free(PSet);
}
template<class T>
bool PCA<T>::Load_Pca_Data(DSet<double> m_Set[])
{
//int nLen=sizeof(m_Set)/sizeof(m_Set[0]);
//if(nLen!=sample) {printf("error,the length of set must be %d\n",nLen); return false;}
printf("print input data\n");
for(int i=0;i<sample;i++)
{
if(m_Set[i].nDim!=dim) {printf("error,the dim of set must be %d\n",dim); return false;}
for(int j=0;j<m_Set[i].nDim;j++)
{
Set[j*sample+i]=m_Set[i].nElement[j];
mean_Set.nElement[j]+=m_Set[i].nElement[j]/sample;
if((j+1)%dim==0) printf("%f\n",Set[j*sample+i]);
else printf("%f ",Set[j*sample+i]);
}
}
printf("Conv_Matrix\n");
for(int i=0;i<dim;i++)
for(int j=0;j<dim;j++)
{
for(int k=0;k<sample;k++)
Conv_Matrix[i*dim+j]+=(m_Set[k].nElement[i]-mean_Set.nElement[i])*(m_Set[k].nElement[j]-mean_Set.nElement[j]);
if((j+1)%dim==0) printf("%f\n",Conv_Matrix[i*dim+j]);
else printf("%f ",Conv_Matrix[i*dim+j]);
}
printf("mean vector\n");
for(int k=0;k<dim;k++) printf("%f ",mean_Set.nElement[k]);
printf("\n");
for(int i=0;i<sample;i++)
for(int j=0;j<dim;j++)
{
Mean_Matrix[i*dim+j]=m_Set[i].nElement[j]-mean_Set.nElement[j];
}
isLoad=true;
return true;
}
//利用雅克比法求矩阵特征值和特征向量
template<class T>
bool PCA<T>::Compute_Eigen_Matrix()
{
if(!isLoad) {printf("Please Load set first!\n"); return false;}
int iter=0,MaxIter=400;
T rr,rc,cc;
T eps=0.01;
//for(int i=0;i<dim;i++) Eigen_Vector[i*dim+i]=Conv_Matrix[i*dim+i];
for(;;)
{
T max=Conv_Matrix[1];
int row=0;
int col=1;
for(int i=0;i<dim;i++)
for(int j=0;j<dim;j++)
{
if(i!=j&&fabs(Conv_Matrix[i*dim+j])>max)
{
row=i;
col=j;
max=fabs(Conv_Matrix[i*dim+j]);
}
}
if(max<eps) break;
rr= Conv_Matrix[row*dim+row];
rc= Conv_Matrix[row*dim+col];
cc= Conv_Matrix[col*dim+col];
T theta=0.5*atan2(-2*rc,cc-rr);
T stheta=sin(theta);
T ctheta=cos(theta);
T s2theta=sin(2*theta);
T c2theta=cos(2*theta);
Conv_Matrix[row*dim+row]=rr*ctheta*ctheta+cc*stheta*stheta+2*rc*ctheta*stheta;
Conv_Matrix[col*dim+col]=rr*stheta*stheta+cc*ctheta*ctheta-2*rc*ctheta*stheta;
Conv_Matrix[row*dim+col]=0.5*(cc-rr)*s2theta+rc*c2theta;
Conv_Matrix[col*dim+row]=Conv_Matrix[row*dim+col];
for(int i=0;i<dim;i++)
{
if((i!=col)&&(i!=row))
{
int s=Conv_Matrix[i*dim+row];
Conv_Matrix[i*dim+row]=Conv_Matrix[i*dim+col]*stheta+s*ctheta;
Conv_Matrix[i*dim+col]=Conv_Matrix[i*dim+col]*ctheta-s*stheta;
}
}
for(int i=0;i<dim;i++)
{
if((i!=col)&&(i!=row))
{
int s=Conv_Matrix[row*dim+i];
Conv_Matrix[row*dim+i]=Conv_Matrix[col*dim+i]*stheta+s*ctheta;
Conv_Matrix[col*dim+i]=Conv_Matrix[col*dim+i]*ctheta-s*stheta;
}
}
for(int i=0;i<dim;i++)
{
int s=Eigen_Vector[i*dim+row];
Eigen_Vector[i*dim+row]=Eigen_Vector[i*dim+col]*stheta+s*ctheta;
Eigen_Vector[i*dim+col]=Eigen_Vector[i*dim+col]*ctheta-s*stheta;
}
iter=iter+1;
if(iter>MaxIter) break;
}
int* flag=(int*)malloc(dim*sizeof(int));
int* flag1=(int*)malloc(dim*sizeof(int));
int index;
memset(flag,-1,dim*sizeof(int));
for(int i=0;i<dim;i++)
{
//if(flag[i]!=-1) continue;
T temp1=Conv_Matrix[i*dim+i];
index=0;
for(int j=0;j<dim;j++)
{
if(flag[i]==-1&&flag[j]==-1)
{
if(temp1<Conv_Matrix[j*dim+j])
{
index=j;
T temp2;
temp2=Conv_Matrix[j*dim+j];
temp1=temp2;
}
}
}
flag[index]=i;
flag1[i]=index;
}
T* temp=(T*)malloc(dim*dim*sizeof(T));
printf("Eigen vector\n");
for(int i=0;i<dim;i++)
{
Eigen_Val[i]=Conv_Matrix[flag1[i]*dim+flag1[i]];
for(int j=0;j<dim;j++)
{
temp[i*dim+j]=Eigen_Vector[flag1[i]*dim+j];
printf("%f ",Eigen_Vector[i*dim+j]);
}
printf("\n");
}
memcpy(Eigen_Vector,temp,dim*dim*sizeof(T));
T sum=0;
int count=0;
for(int i=0;i<dim;i++)
{
sum+=abs(Eigen_Val[i]);
}
T sum1=0;
for(int i=0;i<dim;i++)
{
sum1+=abs(Eigen_Val[i]);
count=count+1;
if(sum1/sum>ratio) break;
}
nR=count;
if(temp) free(temp);
if(flag) free(flag);
if(flag1) free(flag1);
isCompute=true;
return true;
}
template<class T>
void PCA<T>::Print_Feature_Value()
{
printf("The main feature value is:\n");
for(int i=0;i<dim;i++)
printf("%f ",Eigen_Val[i]);
printf("\n");
}
template<class T>
T* PCA<T>::get_Projection_matrix()
{
if(!isCompute) return 0;
T* res=(T*)malloc(nR*dim*sizeof(T));
memcpy(res,Eigen_Vector,nR*dim*sizeof(T));
isProjection=true;
return res;
}
template<class T>
DSet<double> PCA<T>::get_mean_vector()
{
if(!isLoad) printf("Please Load training data first\n");
return mean_Set;
}
template<class T>
int PCA<T>::get_nr()
{
return nR;
}
/*templeat<class T>
T PCA<T>::get_thresh()
{
if(!isProjection) return 0;
T argmax=0;
PSet=(T*)malloc(nSample*nr*sizeof(T));
memset(PSet,0,nSample*nr*sizeof(T));
for(int i=0;i<nSample;i++)
{
for(int j=0;j<nr;j++)
{
PSet[j*nr+i]+=
}
}
}*/
#endif // PCA_H_INCLUDED
//#include <iostream>
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
/*int main(int argc, char** argv) {
return 0;
}*/
#include"pca.h"
#include<ctime>
double* PCA_Test(double* pmatrix,double* input_vector,int nDim,int nr,DSet<double> &m_vector)
{
double* res=(double*)malloc(nr*sizeof(double));
memset((void*)res,0,nDim*nr*sizeof(double));
for(int i=0;i<nr;i++)
{
for(int j=0;j<nDim;j++)
{
res[i]+=pmatrix[i*nDim+j]*(input_vector[j]-m_vector.nElement[j]);
printf("%f\n",pmatrix[i*nDim+j]);
}
}
return res;
}
int main(int argc,char* argv[])
{
const int nSample=7;
const int nDim=2;
DSet<double> m_set[nSample];
for(int i=0;i<nSample;i++)
{
m_set[i].nElement=(double*)malloc(nDim*sizeof(double));
m_set[i].nDim=nDim;
for(int j=0;j<nDim;j++)
{
srand(time(0));
m_set[i].nElement[j]=j+i+rand()%(i+(i+1)*10);
}
}
PCA<double> m_Pca(nSample,nDim); //10 Training set with size 2
m_Pca.Load_Pca_Data(m_set);
m_Pca.Compute_Eigen_Matrix();
m_Pca.Print_Feature_Value();
DSet<double> m_vector=m_Pca.get_mean_vector();
double* pmatrix=m_Pca.get_Projection_matrix();
int nr=m_Pca.get_nr();
printf("nr=%d\n",nr);
//double* test_vector=(double*)malloc(nDim*sizeof(double));
/*for(int i=0;i<nDim;i++)
{
srand(time(0));
test_vector[i]=(i+2);
}*/
double test_vector[2]={-1.1,3.2};
double* fvector=PCA_Test(pmatrix,test_vector,nDim,nr,m_vector);
for(int i=0;i<nr;i++)
{
printf("fvector[%d]=%f\n",i,fvector[i]);
}
//if(test_vector) free(test_vector);
if(fvector) free(fvector);
printf("finish pca test\n");
//system("pause");
return 0;
}