%%% dataset中的最后一列为分类类别,k为要选择的特征个数
function result=MutualInformation(dataset,k)
% character_order
character_order=[];
% the count of classes
classes=unique(dataset(:,size(dataset,2)));
character_count=size(dataset,2)-1;
N=size(dataset,1);
%compute H(Y)
H_Y=0;
h_y=0;
for i=1:length(classes)
class=classes(i);
class_id=find(dataset(:,character_count+1)==class())
class_count=length(class_id);
h_y=h_y+class_count*log(class_count)/N;
end
H_Y=log(length(classes))-h_y;
for i=1:character_count
character=dataset(:,i);
character_value=unique(character,'rows');
H_Y_f=0;
for j=1:length(character_value)
value_rows_id=find(character==character_value(j));
value_rows=dataset(value_rows_id,:);
for m=1;length(classes)
class_id=find(value_rows(:,size(value_rows,2))==classes(m));
o_jk=length(class_id);
o_j=length(value_rows_id);
if o_jk==0||o_j==0
o_jk;
end
H_Y_f=H_Y_f+o_jk*log(o_j/o_jk)/N;
end
end
I_f_Y=H_Y-H_Y_f;
character_order(i,:)=[i,I_f_Y];
end
character_order=sortrows(character_order,2);
result=character_order(1:k,:);
end