sparse Coding的C++代码——详细注释版

最新推荐文章于 2021-01-30 07:22:16 发布

MachineLP

最新推荐文章于 2021-01-30 07:22:16 发布

阅读量2.1k

点赞数 1

分类专栏：机器学习文章标签： sparse Coding

本文链接：https://blog.csdn.net/u014365862/article/details/51307471

版权

机器学习专栏收录该内容

292 篇文章 18 订阅

订阅专栏

<span style="font-size:18px;">#include "stdafx.h"
#include <stdlib.h>
#include <Windows.h>
#include <io.h>
#include <cv.h>
#include <ml.h>
#include <highgui.h>
#include <cxcore.h>

#include "sparse_coding.h"
#include "retr_data_base.h"

using namespace cv;

// 归一化的操作。
void normalizing(float* f, int nFeature)
{
	for(int i=0; i<nFeature; i++)
		f[i] = f[i] / 255.0f;
}

void extrsct_image_feature(float* f, IplImage* src_image, int width, int height)
{
	// 调整image的大小为15*15。
	IplImage* resizedImage = cvCreateImage(cvSize(width,height), IPL_DEPTH_8U,1);
	cvResize(src_image,resizedImage);
	for(int i=0; i<height; i++)
		for(int j=0; j<width; j++)
			// CV_IMAGE_ELEM用于读取image中的像素值。
			f[i*width+j] = (float)CV_IMAGE_ELEM(resizedImage,uchar,i,j);
	// 将image归一化。
	normalizing(f,width*height);
	cvReleaseImage(&resizedImage);
}

void load_basis(float** basis, int* label, int width, int height, char*root_path, vector<string>* label_name)
{
	// 获取得到文件数，子文件名，每个子文件中image的名字。
	ClassInfo* info = TraverClassFolder(root_path);
	char src_full_path[300];

	int sampleCount = 0;
	// 对于每一个类别，也即是label。
	for(int i=0; i<info->classCount; i++)
	{
		(*label_name).push_back(info->className[i].c_str());
		// 对于每个类别中的每一个image。info->fileCounts[i]表示每个类别中的文件数。
		for(int j=0; j<info->fileCounts[i]; j++)
		{
			sprintf(src_full_path,"%s\\%s\\%s.jpg", root_path,info->className[i].c_str(),info->fileName[i][j].c_str());
			IplImage* src_image = cvLoadImage(src_full_path,0);
			extrsct_image_feature(basis[sampleCount],src_image,width,height);
			label[sampleCount] = i;
			sampleCount++;

			cvReleaseImage(&src_image);
		}
	}

}

float cal_recovery_error(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x, int class_label)
{
	float* temp = new float[nFeature];
	float error = 0;
	// 给一个临时变量。
	for(int i=0; i<nFeature; i++)
	{
		temp[i] = testSample[i];
	}

	for(int i=0; i<nClass*nSample; i++)
	{
		if(x[i] != 0 && trainLabel[i] == class_label)
		{
			for(int j=0; j<nFeature; j++)
			{
				// 相当于乘以一个x后，所有的行数相减。
				temp[j] -= x[i]*trainSample[i][j];
			}
		}
	}
	for(int j=0; j<nFeature; j++)
		error += temp[j]*temp[j];

	delete temp;
	return sqrt(error);
}

int sparse_coding_classifier(float** trainSample, int* trainLabel, int nClass, int nSample,int nFeature,float* testSample, float* x)
{
	float minError = 1e10;
	float minIndex = 0;
	float error;
	for(int i=0; i<nClass; i++)
	{
		error = cal_recovery_error(trainSample,trainLabel,nClass,nSample,nFeature,testSample,x,i);
		if(error<minError)
		{
			minError = error;
			minIndex = i;
		}
	}
	return minIndex;
}


int _tmain(int argc, _TCHAR* argv[])
{
	// 一共含有 38个人。
	int nClass = 38;
	// 每个人含有 30 张照片。
	int nSample = 30;
	// 照片的大小调节为 15 x 15。
	int width = 15;
	int height = 15;

	// 定义basis，label。用于存放样本和标签。
	// basis是一个二维的数组，用于存放所有的图片。每一行对于一张图片。
	// 申请空间。首先对于每一行 new，也就是含有的样本数。
	float ** basis = new float*[nClass * nSample];
	// 其次，对于每一列 new， 也就是图片的大小。
	for(int i=0; i<nClass*nSample; i++)
		basis[i] = new float[width*height];
	// 定义数组，来存放相对应的每行image的label。 大小为总的样本数。
	int *label = new int[nClass*nSample];
	// 存放每个人所在的文件名。
	vector<string> label_name;
	// A,b; 存放矩阵相乘后的矩阵。
	float** A = new float*[nClass*nSample];
	for(int i=0; i<nClass*nSample; i++)
		A[i] = new float[nClass*nSample];
	float* b = new float[nClass*nSample];
	float* x = new float[nClass*nSample]; 

	// 训练样本和测试样本存放的文件夹。
	char train_root_path[300] = "Train";
	char test_root_path[300] = "Test";

	// 得到basis,label,label_name。 &label_name表示地址。
	load_basis(basis,label,width,height,train_root_path,&label_name);
	
	// Test
	float lambda = 0.1;
	ClassInfo* info = TraverClassFolder(test_root_path);
	// 用于存放extrsct_image_feature后的image。
	float* testSample = new float[width*height];
	char src_full_path[300];
	int predict_label;
	// cal the matrix A = trainSample'*trainSample size(trainSample) = [nTrainSample, nFeature]
	for(int i=0; i<nClass*nSample; i++)
	{
		for(int j=0; j<nClass*nSample; j++)
		{
			A[i][j] = 0;
			for(int k=0; k<width*height; j++)
				A[i][j] += basis[i][k]*basis[j][k];
		}
	}

	// A[i][i] 对角线上的元素。
	for(int i=0; i<nClass*nSample; i++)
		A[i][i] += 1e-4;

	int posCount = 0;
	int totalCount = 0;
	// 对于每一个类别。
	for(int i=0; i<info->classCount; i++)
	{
		// 对于对于每一个类别中的image的数量。
		for(int j=0; j<info->fileCounts[i]; j++)
		{
			sprintf(src_full_path,"%s\\%s\\%s.jpg",test_root_path,info->className[i].c_str(),info->fileName[i][j].c_str());
			IplImage* src_image = cvLoadImage(src_full_path,0);
			extrsct_image_feature(testSample, src_image,width,height);
			// 对于test image 每个像与train image相乘后累加，
			for(int j=0; j<nClass*nSample; j++)
			{
				b[j] = 0;
				for(int k=0; k<width*height; k++)
				{
					b[j] += -basis[j][k] * testSample[k];
				}
			}

			sparse_coding(A,b,x,lambda,nClass*nSample);
			predict_label = sparse_coding_classifier(basis,label,nClass,nSample,width*height,testSample,x);
			printf("%s  %s",label_name[predict_label].c_str(), info->className[i].c_str());
			
			//判断预测是否正确。
			if(label_name[predict_label].compare(info->className[i].c_str())==0)
			{
				posCount++;
			}
			totalCount++;

			printf("  %d/%d -- %f\n",posCount,totalCount,(float)posCount/totalCount);
			cvReleaseImage(&src_image);
		}
	}

	system("pause");

	return 0;
}</span>

retr_data_base.h

<span style="font-size:18px;">#ifndef RETR_DATA_BASE
#define RETR_DATA_BASE
#include "windows.h"
#include "stdio.h"
#include <vector>
#include <cstring>

using namespace std;

#define PATH_LENGTH 300

struct ClassInfo
{
	// 类别数。
	int classCount;
	// 记录每个类别的文件数。
	vector<int> fileCounts;
	// 子文件名。 也是类别名字。
	vector<string> className;
	// 每个子文件中的image的文件名。
	vector<vector<string>> fileName;
};

ClassInfo* TraverClassFolder(const char* path);

#endif</span>

retr_data_base.cpp

<span style="font-size:18px;">#include "retr_data_base.h"

ClassInfo* TraverClassFolder(const char* path)
{
	ClassInfo* classInfo = new ClassInfo();
	vector<string> pathVec;
	vector<string> fileVec;
	string rootPath = path;
	rootPath.append("\\");

	pathVec.push_back(rootPath);
	while(!pathVec.empty())
	{
		WIN32_FIND_DATAA findFileData;
		HANDLE hFind = INVALID_HANDLE_VALUE;
		string searchPath = pathVec.front();
		searchPath.append("*");
		hFind = FindFirstFileA(searchPath.c_str(),&findFileData);
		if(hFind == INVALID_HANDLE_VALUE)
		{
			return classInfo;
		}

		fileVec.clear();
		int folderCount = 0;
		int fileCount = 0;
		while(FindNextFileA(hFind,&findFileData))
		{
			if(strcmp(findFileData.cFileName,"..")!=0 )
			{
				if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_HIDDEN || findFileData.dwFileAttributes == 38 || findFileData.dwFileAttributes == FILE_ATTRIBUTE_SYSTEM)
					continue;
				if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY)
				{
					string dir = findFileData.cFileName;
					classInfo->className.push_back(dir);
					string rootTemp = rootPath;
					dir = rootPath.append(dir);
					rootPath = rootTemp;
					dir.append("\\");
					pathVec.push_back(dir);
					folderCount ++;
				}
				else if(findFileData.dwFileAttributes == FILE_ATTRIBUTE_NORMAL || findFileData.dwFileAttributes == FILE_ATTRIBUTE_ARCHIVE)
				{
					string file = findFileData.cFileName;
					char extension[200];
					_splitpath(file.c_str(),NULL,NULL,NULL,extension);
					string withoutExtension = file;
					withoutExtension = withoutExtension.substr(0,withoutExtension.length()-strlen(extension));
					fileVec.push_back(withoutExtension);
					fileCount ++;
				}
			}
		}
		if(folderCount!=0)
			classInfo->classCount = folderCount;
		if(fileCount != 0)
		{
			classInfo->fileCounts.push_back(fileCount);
			classInfo->fileName.push_back(fileVec);
		}
		pathVec.erase(pathVec.begin());
	}
	return classInfo;
}</span>

sparse_coding.h

<span style="font-size:18px;">#include <cv.h>

//Reference:
//Efficient sparse coding algorithms (Honglak Lee 2007)

//min 1/2 x'*A*x + b'*x + lambda*|x|
void sparse_coding(float** A,float* b, float* x_out,float lambda, int n);</span>

sparse_coding.cpp

<span style="font-size:18px;">#include "sparse_coding.h"

//The structure defined for efficiency. In most cases, x is very sparse, 
//the computation cost will significantly reduced if the calculation is only implemented on nonzero variables
struct SolutionX 
{
	int n;// dimension of x
	float* x;
	int nPos;//the number of nonzero variables
	int* index;//the index of the nonzero variables
	int* sign;//the signs of x
};


void create_solutionx(SolutionX *s, int n)
{
	s->n = n;
	s->nPos = 0;
	s->index = new int[n];
	s->sign = new int[n];
	s->x = new float[n];
	memset(s->index,0,n*sizeof(int));
	memset(s->sign,0,n*sizeof(int));
	memset(s->x,0,n*sizeof(float));
}

void release_solutionx(SolutionX *s)
{
	delete[] s->index;
	delete[] s->sign;
	delete[] s->x;
}

inline void cal_gradient(float** A, float* b, SolutionX* x,int n, float* gradient)
{
	memset(gradient,0,n*sizeof(float));
	for (int i=0;i<n;i++)
	{
		for (int j=0;j<x->nPos;j++)
			gradient[i] += A[i][x->index[j]]*x->x[x->index[j]];
	}

	for(int i=0;i<n;i++)
		gradient[i] += b[i];
}

void reset_solutionx(SolutionX* x)
{
	for (int i=0;i<x->nPos;i++)
	{
		x->x[x->index[i]] = 0;
		x->sign[x->index[i]] = 0;
	}
	x->nPos = 0;
}

void copy_solutionx(SolutionX *src,SolutionX* dst)
{
	if(dst->n != src->n)
		printf("error in copy_solutionx -- dst->n != src->n");

	reset_solutionx(dst);

	dst->nPos = src->nPos;
	for (int i=0;i<src->nPos;i++)
	{
		dst->index[i] = src->index[i];
		dst->sign[src->index[i]] = src->sign[src->index[i]];
		dst->x[src->index[i]] = src->x[src->index[i]];
	}	
}



// Asub*x + bsub + lambda*sign(xsub) = 0
// cvSolve is employed to solve A*x = b type problem, CHOLESKY method is used(A is a positive definite matrix)
void solve_reduced_problem(float** A, float* b, SolutionX *x, float lambda)
{
	int nPos = x->nPos;
	CvMat* Asub = cvCreateMat(nPos,nPos,CV_32FC1);
	CvMat* bsub = cvCreateMat(nPos,1,CV_32FC1);
	CvMat* xsub = cvCreateMat(nPos,1,CV_32FC1);

	for (int i=0;i<nPos;i++)
	{
		CV_MAT_ELEM(*bsub,float,i,0) = -(b[x->index[i]] + lambda*x->sign[x->index[i]]);
		for (int j=0;j<nPos;j++)
		{
			CV_MAT_ELEM(*Asub,float,i,j) = A[x->index[i]][x->index[j]];
		}
	}


	int flag = 0;
	int count = 0;
	while (true)
	{
		flag = cvSolve(Asub,bsub,xsub,CV_CHOLESKY);//cvSolve return 0, if A is Singular
		if(flag!=0)
			break;
		for (int i=0;i<nPos;i++)
			CV_MAT_ELEM(*Asub,float,i,i) = A[x->index[i]][x->index[i]] + pow(10.0f,count-6);
		count++;
	}

	for (int i=0;i<nPos;i++)
	{
		x->x[x->index[i]] = CV_MAT_ELEM(*xsub,float,i,0);
	}

	cvReleaseMat(&Asub);
	cvReleaseMat(&bsub);
	cvReleaseMat(&xsub);
}

// fval = 1/2 x'*A*x + b'*x + lambda*|x|
double cal_function_val(float** A,float* b, SolutionX* x_in,float lambda)
{
	int n = x_in->nPos;
	int* index = x_in->index;
	float* x = x_in->x;
	double fval = 0;
	for (int i=0;i<n;i++)
	{
		for (int j=0;j<n;j++)
		{
			fval += 0.5*A[index[i]][index[j]]*x[index[i]]*x[index[j]];
		}
	}

	for (int i=0;i<n;i++)
	{
		fval += b[index[i]]*x[index[i]]+ lambda*abs(x[index[i]]);
	}

	return fval;
}

int mySign(float x)
{
	if(x>0) return 1;
	if(x<0) return -1;
	return 0;
}

//min 1/2 x'*A*x + b'*x + lambda*|x|
void sparse_coding(float** A,float* b, float* x_out,float lambda, int n)
{
	float* gradient = new float[n];
	int maxIndex = 0;
	float maxValue = 0;
	float fval;
	float eps = 1e-9;
	SolutionX x;
	SolutionX x_new;
	SolutionX xs;
	SolutionX xmin;

	//Create solutions to store the current X and the temporary X values
	create_solutionx(&x,n);
	create_solutionx(&x_new,n);
	create_solutionx(&xs,n);
	create_solutionx(&xmin,n);

	//Check the gradient for every dimension, and find the dimension with the largest gradient
	cal_gradient(A,b,&x,n,gradient);
	maxValue = -1e10;
	for(int i=0;i<n;i++)
	{
		if(abs(x.sign[i])!=0) continue;
		if(abs(gradient[i])>maxValue)
		{
			maxIndex = i;
			maxValue = abs(gradient[i]);
		}
	}

	while(true)
	{
		//bring in a variable according the gradient
		copy_solutionx(&x,&x_new);

		//If the absolute value of the largest gradient is less than lambda, then the function reaches convergence;
		//else set the dimension with the largest gradient active
		if(gradient[maxIndex]>lambda+eps)
		{
			x.x[maxIndex] = (lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];//The initialization is important for the variable kick out section
			x.sign[maxIndex] = -1;
			x.index[x.nPos] = maxIndex;
			x.nPos++;

			x_new.sign[maxIndex] = -1;
			x_new.index[x_new.nPos] = maxIndex;
			x_new.nPos++;
		}
		else if(gradient[maxIndex]<-lambda-eps)
		{
			x.x[maxIndex] = (-lambda - gradient[maxIndex]) / A[maxIndex][maxIndex];
			x.sign[maxIndex] = 1;
			x.index[x.nPos] = maxIndex;
			x.nPos++;

			x_new.sign[maxIndex] = 1;
			x_new.index[x_new.nPos] = maxIndex;
			x_new.nPos++;
		}
		else
		{
			break;
		}

		//Solve the reduced problem, and get the function value with the new value of X
		solve_reduced_problem(A,b,&x_new,lambda);
		fval = cal_function_val(A,b,&x_new,lambda);
		copy_solutionx(&x_new,&xmin);

		//kick out variables
		//if the old signs are not consistent with the new solution, try to kick the inconsistent solution out if the funVal reduces
		//Note: the new variable which was just brought in this round is an exception

		//Doing discrete line search to find the X with the minimum function value
		for (int i=0;i<(x_new.nPos-1)/*the new variable is an exception*/;i++)
		{
			if (mySign(x_new.x[x_new.index[i]]) != x_new.sign[x_new.index[i]])
			{

				copy_solutionx(&x_new,&xs);

				float wxnew,wx;
				float xx_new, xx;
				xx_new = x_new.x[x_new.index[i]];
				xx = x.x[x.index[i]];
				wx = xx_new/(xx_new-xx+eps);
				wxnew = (1-wx);
				for(int j=0;j<xs.nPos;j++)
				{
					xs.x[xs.index[j]] = wxnew*x_new.x[xs.index[j]] + wx*x.x[xs.index[j]];
				}
				xs.x[x_new.index[i]] = 0;

				float temp_fval = cal_function_val(A,b,&xs,lambda);

				if (temp_fval<fval)
				{
					fval = temp_fval;
					copy_solutionx(&xs,&xmin);
				}
			}		
		}

		//Keep the signs are consistent with the value
		int posCount = 0;
		for (int i=0;i<n;i++)
		{
			xmin.sign[i] = mySign(xmin.x[i]);
			if (xmin.sign[i]!=0)
			{
				//printf("%d %f\n",i,xmin.x[i]);
				xmin.index[posCount] = i;
				posCount++;
			}
		}
		xmin.nPos = posCount;

		//Update X with the value with minimum function value
		copy_solutionx(&xmin,&x);

		//Calculate the gradient for the current X
		cal_gradient(A,b,&x,n,gradient);
		maxValue = -1e10;
		for(int i=0;i<n;i++)
		{
			if(x.x[i]!=0) continue;
			if(abs(gradient[i])>maxValue)
			{
				maxIndex = i;
				maxValue = abs(gradient[i]);
			}
		}
	}

	//Output the final value of X
	memset(x_out,0,n*sizeof(float));
	for (int i=0;i<x.nPos;i++)
	{
		x_out[x.index[i]] = x.x[x.index[i]];
	}

	//Release
	delete[] gradient;
	release_solutionx(&x);
	release_solutionx(&x_new);
	release_solutionx(&xs);
	release_solutionx(&xmin);
}</span>