<span style="font-size:18px;">#include "stdafx.h"
#include <stdlib.h>
#include <Windows.h>
#include <io.h>
#include <cv.h>
#include <ml.h>
#include <highgui.h>
#include <cxcore.h>
#include "sparse_coding.h"
#include "retr_data_base.h"
using namespace cv;
// 归一化的操作。
void normalizing(float* f, int nFeature)
{
for(int i=0; i<nFeature; i++)
f[i] = f[i] / 255.0f;
}
void extrsct_image_feature(float* f, IplImage* src_image, int width, int height)
{
// 调整image的大小为15*15。
IplImage* resizedImage = cvCreateImage(cvSize(width,height), IPL_DEPTH_8U,1);
cvResize(src_image,resizedImage);
for(int i=0; i<height; i++)
for(int j=0; j<width; j++)
// CV_IMAGE_ELEM用于读取image中的像素值。
f[i*width+j] = (float)CV_IMAGE_ELEM(resizedImage,uchar,i,j);
// 将image归一化。
normalizing(f,width*height);
cvReleaseImage(&resizedImage);
}
void load_basis(float** basis, int* label, int width, int height, char*root_path, vector<string>* label_name)
{
// 获取得到文件数,子文件名,每个子文件中image的名字。
ClassInfo* info = TraverClassFolder(root_path);
char src_full_path[300];
int sampleCount = 0;
// 对于每一个类别,也即是label。
for(int i=0; i<info->classCount; i++)
{
(*label_name).push_back(info->className[i].c_str());
// 对于每个类别中的每一个image。info->fileCounts[i]表示每个类别中的文件数。
for(int j=0; j<info->fileCounts[i]; j++)
{
sprintf(src_full_path,"%s\\%s\\%s.jpg", root_path,info->className[i].c_str(),info->fileName[i][j].c_str());
IplImage* src_image = cvLoadImage(src_full_path,0);
extrsct_image_feature(basis[sampleCount],src_image,width,height);
label[sampleCount] = i;
sampleCount++;
cvReleaseImage(&src_image);
}
}
}
float cal_recovery_error(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x, int class_label)
{
float* temp = new float[nFeature];
float error = 0;
// 给一个临时变量。
for(int i=0; i<nFeature; i++)
{
temp[i] = testSample[i];
}
for(int i=0; i<nClass*nSample; i++)
{
if(x[i] != 0 && trainLabel[i] == class_label)
{
for(int j=0; j<nFeature; j++)
{
// 相当于乘以一个x后,所有的行数相减。
temp[j] -= x[i]*trainSample[i][j];
}
}
}
for(int j=0; j<nFeature; j++)
error += temp[j]*temp[j];
delete temp;
return sqrt(error);
}
int sparse_coding_classifier(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x)
{
float minError = 1e10;
float minIndex = 0;
float error;
for(int i=0; i<nClass; i++)
{
error = cal_recovery_error(trainSample,trainLabel,nClass,nSample,nFeature,testSample,x,i);
if(error<minError)
{
minError = error;
minIndex = i;
}
}
return minIndex;
}
int _tmain(int argc, _TCHAR* argv[])
{
// 一共含有 38个人。
int nClass = 38;
// 每个人含有 30 张照片。
int nSample = 30;
// 照片的大小调节为 15 x 15。
int width = 15;
int height = 15;
// 定义basis,label。用于存放样本和标签。
// basis是一个二维的数组,用于存放所有的图片。每一行对于一张图片。
// 申请空间。首先对于每一行 new,也就是含有的样本数。
float ** basis = new float*[nClass * nSample];
// 其次,对于每一列 new, 也就是图片的大小。
for(int i=0; i<nClass*nSample; i++)
basis[i] = new float[width*height];
// 定义数组,来存放相对应的每行image的label。 大小为总的样本数。
int *label = new int[nClass*nSample];
// 存放每个人所在的文件名。
vector<string> label_name;
// A,b; 存放矩阵相乘后的矩阵。
float** A = new float*[nClass*nSample];
for(int i=0; i<nClass*nSample; i++)
A[i] = new float[nClass*nSample];
float* b = new float[nClass*nSample];
float* x = new float[nClass*nSample];
// 训练样本和测试样本存放的文件夹。
char train_root_path[300] = "Train";
char test_root_path[300] = "Test";
// 得到basis,label,label_name。 &label_name表示地址。
load_basis(basis,label,width,height,train_root_path,&label_name);
// Test
float lambda = 0.1;
ClassInfo* info = TraverClassFolder(test_root_path);
// 用于存放extrsct_image_feature后的image。
float* testSample = new float[width*height];
char src_full_path[300];
int predict_label;
// cal the matrix A = trainSample'*trainSample size(trainSample) = [nTrainSample, nFeature]
for(int i=0; i<nClass*nSample; i++)
{
for(int j=0; j<nClass*nSample; j++)
{
A[i][j] = 0;
for(int k=0; k<width*height; j++)
A[i][j] += basis[i][k]*basis[j][k];
}
}
// A[i][i] 对角线上的元素。
for(int i=0; i<nClass*nSample; i++)
A[i][i] += 1e-4;
int posCount = 0;
int totalCount = 0;
// 对于每一个类别。
for(int i=0; i<info->classCount; i++)
{
// 对于对于每一个类别中的image的数量。
for(int j=0; j<info->fileCounts[i]; j++)
{
sprintf(src_full_path,"%s\\%s\\%s.jpg",test_root_path,info->className[i].c_str(),info->fileName[i][j].c_str());
IplImage* src_image = cvLoadImage(src_full_path,0);
extrsct_image_feature(testSample, src_image,width,height);
// 对于test image 每个像与train image相乘后累加,
for(int j=0; j<nClass*nSample; j++)
{
b[j] = 0;
for(int k=0; k<width*height; k++)
{
b[j] += -basis[j][k] * testSample[k];
}
}
sparse_coding(A,b,x,lambda,nClass*nSample);
predict_label = sparse_coding_classifier(basis,label,nClass,nSample,width*height,testSample,x);
printf("%s %s",label_name[predict_label].c_str(), info->className[i].c_str());
//判断预测是否正确。
if(label_name[predict_label].compare(info->className[i].c_str())==0)
{
posCount++;
}
totalCount++;
printf(" %d/%d -- %f\n",posCount,totalCount,(float)posCount/totalCount);
cvReleaseImage(&src_image);
}
}
system("pause");
return 0;
}</span>
<span style="font-size:18px;">#ifndef RETR_DATA_BASE
#define RETR_DATA_BASE
#include "windows.h"
#include "stdio.h"
#include <vector>
#include <cstring>
using namespace std;
#define PATH_LENGTH 300
struct ClassInfo
{
// 类别数。
int classCount;
// 记录每个类别的文件数。
vector<int> fileCounts;
// 子文件名。 也是类别名字。
vector<string> className;
// 每个子文件中的image的文件名。
vector<vector<string>> fileName;
};
ClassInfo* TraverClassFolder(const char* path);
#endif</span>
retr_data_base.cpp
<span style="font-size:18px;">#include "retr_data_base.h"
ClassInfo* TraverClassFolder(const char* path)
{
ClassInfo* classInfo = new ClassInfo();
vector<string> pathVec;
vector<string> fileVec;
string rootPath = path;
rootPath.append("\\");
pathVec.push_back(rootPath);
while(!pathVec.empty())
{
WIN32_FIND_DATAA findFileData;
HANDLE hFind = INVALID_HANDLE_VALUE;
string searchPath = pathVec.front();
searchPath.append("*");
hFind = FindFirstFileA(searchPath.c_str(),&findFileData);
if(hFind == INVALID_HANDLE_VALUE)
{
return classInfo;
}
fileVec.clear();
int folderCount = 0;
int fileCount = 0;
while(FindNextFileA(hFind,&findFileData))
{
if(strcmp(findFileData.cFileName,"..")!=0 )
{
if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_HIDDEN || findFileData.dwFileAttributes == 38 || findFileData.dwFileAttributes == FILE_ATTRIBUTE_SYSTEM)
continue;
if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY)
{
string dir = findFileData.cFileName;
classInfo->className.push_back(dir);
string rootTemp = rootPath;
dir = rootPath.append(dir);
rootPath = rootTemp;
dir.append("\\");
pathVec.push_back(dir);
folderCount ++;
}
else if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_NORMAL || findFileData.dwFileAttributes == FILE_ATTRIBUTE_ARCHIVE)
{
string file = findFileData.cFileName;
char extension[200];
_splitpath(file.c_str(),NULL,NULL,NULL,extension);
string withoutExtension = file;
withoutExtension = withoutExtension.substr(0,withoutExtension.length()-strlen(extension));
fileVec.push_back(withoutExtension);
fileCount ++;
}
}
}
if(folderCount!=0)
classInfo->classCount = folderCount;
if(fileCount != 0)
{
classInfo->fileCounts.push_back(fileCount);
classInfo->fileName.push_back(fileVec);
}
pathVec.erase(pathVec.begin());
}
return classInfo;
}</span>
sparse_coding.h
<span style="font-size:18px;">#include <cv.h>
//Reference:
//Efficient sparse coding algorithms (Honglak Lee 2007)
//min 1/2 x'*A*x + b'*x + lambda*|x|
void sparse_coding(float** A,float* b, float* x_out,float lambda, int n);</span>
sparse_coding.cpp
<span style="font-size:18px;">#include "sparse_coding.h"
//The structure defined for efficiency. In most cases, x is very sparse,
//the computation cost will significantly reduced if the calculation is only implemented on nonzero variables
struct SolutionX
{
int n;// dimension of x
float* x;
int nPos;//the number of nonzero variables
int* index;//the index of the nonzero variables
int* sign;//the signs of x
};
void create_solutionx(SolutionX *s, int n)
{
s->n = n;
s->nPos = 0;
s->index = new int[n];
s->sign = new int[n];
s->x = new float[n];
memset(s->index,0,n*sizeof(int));
memset(s->sign,0,n*sizeof(int));
memset(s->x,0,n*sizeof(float));
}
void release_solutionx(SolutionX *s)
{
delete[] s->index;
delete[] s->sign;
delete[] s->x;
}
inline void cal_gradient(float** A, float* b, SolutionX* x,int n, float* gradient)
{
memset(gradient,0,n*sizeof(float));
for (int i=0;i<n;i++)
{
for (int j=0;j<x->nPos;j++)
gradient[i] += A[i][x->index[j]]*x->x[x->index[j]];
}
for(int i=0;i<n;i++)
gradient[i] += b[i];
}
void reset_solutionx(SolutionX* x)
{
for (int i=0;i<x->nPos;i++)
{
x->x[x->index[i]] = 0;
x->sign[x->index[i]] = 0;
}
x->nPos = 0;
}
void copy_solutionx(SolutionX *src,SolutionX* dst)
{
if(dst->n != src->n)
printf("error in copy_solutionx -- dst->n != src->n");
reset_solutionx(dst);
dst->nPos = src->nPos;
for (int i=0;i<src->nPos;i++)
{
dst->index[i] = src->index[i];
dst->sign[src->index[i]] = src->sign[src->index[i]];
dst->x[src->index[i]] = src->x[src->index[i]];
}
}
// Asub*x + bsub + lambda*sign(xsub) = 0
// cvSolve is employed to solve A*x = b type problem, CHOLESKY method is used(A is a positive definite matrix)
void solve_reduced_problem(float** A, float* b, SolutionX *x, float lambda)
{
int nPos = x->nPos;
CvMat* Asub = cvCreateMat(nPos,nPos,CV_32FC1);
CvMat* bsub = cvCreateMat(nPos,1,CV_32FC1);
CvMat* xsub = cvCreateMat(nPos,1,CV_32FC1);
for (int i=0;i<nPos;i++)
{
CV_MAT_ELEM(*bsub,float,i,0) = -(b[x->index[i]] + lambda*x->sign[x->index[i]]);
for (int j=0;j<nPos;j++)
{
CV_MAT_ELEM(*Asub,float,i,j) = A[x->index[i]][x->index[j]];
}
}
int flag = 0;
int count = 0;
while (true)
{
flag = cvSolve(Asub,bsub,xsub,CV_CHOLESKY);//cvSolve return 0, if A is Singular
if(flag!=0)
break;
for (int i=0;i<nPos;i++)
CV_MAT_ELEM(*Asub,float,i,i) = A[x->index[i]][x->index[i]] + pow(10.0f,count-6);
count++;
}
for (int i=0;i<nPos;i++)
{
x->x[x->index[i]] = CV_MAT_ELEM(*xsub,float,i,0);
}
cvReleaseMat(&Asub);
cvReleaseMat(&bsub);
cvReleaseMat(&xsub);
}
// fval = 1/2 x'*A*x + b'*x + lambda*|x|
double cal_function_val(float** A,float* b, SolutionX* x_in,float lambda)
{
int n = x_in->nPos;
int* index = x_in->index;
float* x = x_in->x;
double fval = 0;
for (int i=0;i<n;i++)
{
for (int j=0;j<n;j++)
{
fval += 0.5*A[index[i]][index[j]]*x[index[i]]*x[index[j]];
}
}
for (int i=0;i<n;i++)
{
fval += b[index[i]]*x[index[i]]+ lambda*abs(x[index[i]]);
}
return fval;
}
int mySign(float x)
{
if(x>0) return 1;
if(x<0) return -1;
return 0;
}
//min 1/2 x'*A*x + b'*x + lambda*|x|
void sparse_coding(float** A,float* b, float* x_out,float lambda, int n)
{
float* gradient = new float[n];
int maxIndex = 0;
float maxValue = 0;
float fval;
float eps = 1e-9;
SolutionX x;
SolutionX x_new;
SolutionX xs;
SolutionX xmin;
//Create solutions to store the current X and the temporary X values
create_solutionx(&x,n);
create_solutionx(&x_new,n);
create_solutionx(&xs,n);
create_solutionx(&xmin,n);
//Check the gradient for every dimension, and find the dimension with the largest gradient
cal_gradient(A,b,&x,n,gradient);
maxValue = -1e10;
for(int i=0;i<n;i++)
{
if(abs(x.sign[i])!=0) continue;
if(abs(gradient[i])>maxValue)
{
maxIndex = i;
maxValue = abs(gradient[i]);
}
}
while(true)
{
//bring in a variable according the gradient
copy_solutionx(&x,&x_new);
//If the absolute value of the largest gradient is less than lambda, then the function reaches convergence;
//else set the dimension with the largest gradient active
if(gradient[maxIndex]>lambda+eps)
{
x.x[maxIndex] = (lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];//The initialization is important for the variable kick out section
x.sign[maxIndex] = -1;
x.index[x.nPos] = maxIndex;
x.nPos++;
x_new.sign[maxIndex] = -1;
x_new.index[x_new.nPos] = maxIndex;
x_new.nPos++;
}
else if(gradient[maxIndex]<-lambda-eps)
{
x.x[maxIndex] = (-lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];
x.sign[maxIndex] = 1;
x.index[x.nPos] = maxIndex;
x.nPos++;
x_new.sign[maxIndex] = 1;
x_new.index[x_new.nPos] = maxIndex;
x_new.nPos++;
}
else
{
break;
}
//Solve the reduced problem, and get the function value with the new value of X
solve_reduced_problem(A,b,&x_new,lambda);
fval = cal_function_val(A,b,&x_new,lambda);
copy_solutionx(&x_new,&xmin);
//kick out variables
//if the old signs are not consistent with the new solution, try to kick the inconsistent solution out if the funVal reduces
//Note: the new variable which was just brought in this round is an exception
//Doing discrete line search to find the X with the minimum function value
for (int i=0;i<(x_new.nPos-1)/*the new variable is an exception*/;i++)
{
if (mySign(x_new.x[x_new.index[i]]) != x_new.sign[x_new.index[i]])
{
copy_solutionx(&x_new,&xs);
float wxnew,wx;
float xx_new, xx;
xx_new = x_new.x[x_new.index[i]];
xx = x.x[x.index[i]];
wx = xx_new/(xx_new-xx+eps);
wxnew = (1-wx);
for(int j=0;j<xs.nPos;j++)
{
xs.x[xs.index[j]] = wxnew*x_new.x[xs.index[j]] + wx*x.x[xs.index[j]];
}
xs.x[x_new.index[i]] = 0;
float temp_fval = cal_function_val(A,b,&xs,lambda);
if (temp_fval<fval)
{
fval = temp_fval;
copy_solutionx(&xs,&xmin);
}
}
}
//Keep the signs are consistent with the value
int posCount = 0;
for (int i=0;i<n;i++)
{
xmin.sign[i] = mySign(xmin.x[i]);
if (xmin.sign[i]!=0)
{
//printf("%d %f\n",i,xmin.x[i]);
xmin.index[posCount] = i;
posCount++;
}
}
xmin.nPos = posCount;
//Update X with the value with minimum function value
copy_solutionx(&xmin,&x);
//Calculate the gradient for the current X
cal_gradient(A,b,&x,n,gradient);
maxValue = -1e10;
for(int i=0;i<n;i++)
{
if(x.x[i]!=0) continue;
if(abs(gradient[i])>maxValue)
{
maxIndex = i;
maxValue = abs(gradient[i]);
}
}
}
//Output the final value of X
memset(x_out,0,n*sizeof(float));
for (int i=0;i<x.nPos;i++)
{
x_out[x.index[i]] = x.x[x.index[i]];
}
//Release
delete[] gradient;
release_solutionx(&x);
release_solutionx(&x_new);
release_solutionx(&xs);
release_solutionx(&xmin);
}</span>