Mex文件编写实例: entropy used in mutual information

Jianxin Wu在他的文章《Compact Representation for Image Classification: To Choose or to Compress?》中提出了一种用于特征筛选的方法,即互信息(Mutual Information)法。


其Publication主页  http://cs.nju.edu.cn/wujx/publication.htm 上给出了具体的Matlab实现代码和论文PDF。


博主由于最近需要利用MI方法来筛选特征,而Matlab版本的代码运行速度太慢,故特意编写了C++的mex代码,并给出了Matlab中对应位置的注释。经验证无错误。


Jianxin Wu编写的Matlab代码如下:

(本博客系原创,转载请注明出处:http://blog.csdn.net/xuexiyanjiusheng/article/details/46882447)


function [entX,entXY] = entropy(dinsts, labels, bins)
% Written by Jianxin Wu (Jianxin Wu, wujx2001@gmail.com) 

% % Given the data in 'dinsts', where each row is one example 
% and the 'labels' (integers 1, 2, ..., C, where C is the number of categories)
% One parameter 'bins' is needed: 
% case '-2': quantize dinst into 2-bits, then compute MI 
% case '-1': quantize dinst into 1-bit, then compute MI 
% else: be a positive integer, uniform quantization 

% Output: 
% entX: the entropy of each feature dimension 
% entXY: the mutual information between each dimension and the labels 

% % You may want to change the thresholds for bins = -1 (1-bit) or -2 (2-bits) 
% e.g., if your data is non-negative, current threshold values in X will not work 
% they are designed for FV / VLAD features (which are [-1 +1]) 
% % Even if your data are [-1 +1], you may want to change the threshold +/- 0.0125 to 
% a value that fits your dataset, e.g., 0.1- and 0.9-quantile分位数  

[~, dim] = size(dinsts);%number of columns 
nr_class = max(labels); % we assume labels are 1, 2, 3, ... 

if bins == -2 % this is the case for quantized 2-bit version 
    X = [-1 -0.0125 0 0.0125 1]; 
elseif bins == -1 % 1-bit version 
    X = [-1 0 1]; 
else
    vmin = min(min(dinsts));
    vmax = max(max(dinsts)); 
    X = vmin:((vmax-vmin)/bins):vmax+1e-6; 
end

bins = length(X)-1; 
probX = zeros(bins,dim); 
probXY = zeros(bins*nr_class,dim); 

for i = 1:dim 
    if mod(i,1000)==0 
        fprintf('.'); 
    end
    if mod(i,100000)==0 
        fprintf('\n'); 
    end
    
    temp = dinsts(:,i);%i-th column
    frequency = histc(temp, X); 
    frequency = frequency / sum(frequency); 
    probX(:,i) = frequency(1:end-1); 
    this = zeros(1,nr_class*bins); 
    
    for j = 1:nr_class 
        frequency = histc(temp(labels==j), X); 
        this((j-1)*bins+1:j*bins) = frequency(1:end-1); 
    end
    this = this / sum(this); 
    probXY(:,i) = this; 
end

%fprintf('\n');
entX = -sum(probX.*log2(probX+1.0e-6)); 
entXY = -sum(probXY.*log2(probXY+1.0e-6));


博主编写的对应C++代码如下:


//written by Panfeng Li(Panfeng Li,panfengli@hust.edu.en)
//This code is just used for study
#include <math.h>
#include <string.h>
#include "mex.h"

void histc_c(double *input, double *edges, int len_input, int bins, double *outfreq, int *count)//just deal with one dimension
{
	int i, j;
	for (i = 0; i<len_input; i++)//i-th input
	{
		for (j = 0; j< bins; j++)
		{
			if (input[i] >= edges[j] && input[i]<edges[j + 1])
			{
				outfreq[j]++;
				(*count)++;
			}
		}
		if (input[i] == edges[bins])
		{
			outfreq[bins]++;
			(*count)++;
		}
	}
}

static void ent(double *dinsts, double *labels, double *X, int M, int N, int bins, int nr_class, double *entX, double *entXY)
{
	//dinsts([M N])
	//probX = zeros(bins,dim); 
	//probXY = zeros(bins*nr_class,dim);

	double	*temp, *temp_;
	double *frequency;
	double *this_;
	int i, j, k;
	int count, count_;
	double tem, tem_;
	double *probX, *probXY;

	temp = new double[M];//memory one column of dinsts
	temp_ = new double[M];
	frequency = new double[bins + 1];//bins = length(X)-1
	this_ = new double[nr_class*bins];//nr_class = max(labels)
	//this = zeros(1,nr_class*bins);
	probX = new double[bins*N];
	probXY = new double[bins*nr_class * N];

	memset(probX, 0, (bins*N)*sizeof(double));
	memset(probXY, 0, (bins*nr_class * N)*sizeof(double));


	//calculate probX,probXY
	for (i = 0; i < N; i++)//i-th column of dinsts
	{
		//calculate probX
		memset(frequency, 0, (bins + 1)*sizeof(double));//each column gets its frequency

		for (j = 0; j < M; j++)//j-th row of dinsts
		{
			temp[j] = dinsts[j + i * M];//temp = dinsts(:,i)
		}

		count = 0;
		histc_c(temp, X, M, bins, frequency, &count);//frequency = histc(temp, X); 
		for (j = 0; j < bins + 1; j++)
		{
			//frequency[j] = frequency[j] / (count + 1.0e-6);//frequency = frequency / sum(frequency); 
			frequency[j] = frequency[j] / (count );
		}

		//probX = zeros(bins,dim); 
		for (j = 0; j < bins; j++)
		{
			probX[j + i * bins] = frequency[j];//probX(:,i) = frequency(1:end-1);
		}



		//calculate probXY
		memset(this_, 0, (nr_class*bins)*sizeof(double));//this = zeros(1,nr_class*bins); 

		tem = 0;
		tem_ = 0;
		for (j = 1; j <= nr_class; j++)//for j = 1:nr_class
		{
			count_ = 0;//for temp_
			memset(frequency, 0, (bins + 1)*sizeof(double));
			for (k = 0; k < M; k++)
			{
				if (labels[k] == j)//labels = 1 or 2
				{
					temp_[count_] = temp[k];//temp(labels==j)
					count_++;
				}
			}

			count = 0;//for frequency
			histc_c(temp_, X, count_, bins, frequency, &count);//frequency = histc(temp(labels==j), X);

			for (k = (j - 1)*bins; k < j*bins; k++)
			{
				this_[k] = frequency[k - (j - 1)*bins];//this((j-1)*bins+1:j*bins) = frequency(1:end-1); 
				//!!!note that 2+1:4 = 3:4;
				tem += this_[k];
				tem_++;
			}
		}

		for (j = 0; j < tem_; j++)
		{
			//this_[j] = this_[j] / (tem + 1.0e-6);//this = this / sum(this); 
			this_[j] = this_[j] / (tem);
		}

		//probXY = zeros(bins*nr_class,dim)
		for (j = 0; j < bins*nr_class; j++)
		{
			probXY[j + i * (bins*nr_class)] = this_[j];//probXY(:,i) = this; 
		}

	}


	//calculate entX,entXY
	for (i = 0; i < N; i++)
	{
		entX[i] = - probX[i * 2] * (log(probX[i * 2] + 1.0e-6) / log(2.0)) - probX[i * 2 + 1] * (log(probX[i * 2 + 1] + 1.0e-6) / log(2.0));//entX = -sum(probX.*log2(probX + 1.0e-6))
		entXY[i] = - probXY[i * 4] * (log(probXY[i * 4] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 1] * (log(probXY[i * 4 + 1] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 2] * (log(probXY[i * 4 + 2] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 3] * (log(probXY[i * 4 + 3] + 1.0e-6) / log(2.0));//entXY = -sum(probXY.*log2(probXY + 1.0e-6))
	}
	

	delete[] temp;
	delete[] temp_;
	delete[] frequency;
	delete[] this_;
	delete[] probX;
	delete[] probXY;

}

//dinsts, labels, X, row, dim, bins, nr_class          
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[])

{
	double *dinsts;
	double *labels;
	double *X;
	int M, N, bins, nr_class;

	double *entX, *entXY;

	dinsts = mxGetPr(prhs[0]);
	labels = mxGetPr(prhs[1]);
	X = mxGetPr(prhs[2]);

	M = (int)mxGetScalar(prhs[3]);
	N = (int)mxGetScalar(prhs[4]);
	bins = (int)mxGetScalar(prhs[5]);
	nr_class = (int)mxGetScalar(prhs[6]);

	//entX = [1,dim]; 
	//entXY = [1,dim];
	plhs[0] = mxCreateDoubleMatrix(1,(mwSize)N, mxREAL);
	plhs[1] = mxCreateDoubleMatrix(1,(mwSize)N, mxREAL);

	entX = mxGetPr(plhs[0]);
	entXY = mxGetPr(plhs[1]);

	ent(dinsts, labels, X, M, N, bins, nr_class, entX, entXY);

}
 

直接mex ent.cpp就可以使用了。只需要注意输进来的参数要设置好。


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值