我现在有个朴素贝叶斯分类器代码,代码如下:
function out=my_bayes(X,Y)
%X为原数据集,Y是要预测的数据,out是返回预测的结果
%%%%%%%%%%%%%%%%%%%%%%打开test.txt文件
clc;
file = textread('train1.txt','%s','delimiter','\n','whitespace','');
[m,n]=size(file);
for i=1:m
words=strread(file{i},'%s','delimiter',' ');
words=words';
X{i}=words;
end
X=X';%转置
%%%%%%%%%%%%%%%%%%%%%打开predict.txt文件
file = textread('predict1.txt','%s','delimiter','\n','whitespace','');
[m,n]=size(file);
for i=1:m
words=strread(file{i},'%s','delimiter',' ');
words=words';
Y{i}=words;
end
Y=Y';%转置
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%训练部分
[M,N]=size(X);
[m,n]=size(X{1});
decision=attribute(X,n); %提取决策属性
Pro=probality(decision);%计算决策属性个分量概率
for i=1:n-1
[post_pro{i},post_name{i}]=post_prob(attribute(X,i),decision); %求各条件属性后验概率
end
%%%%%%%%%%%%%%%%%%%%%%%%预测部分
uniq_decis=unique(decision); %求决策属性的类别
P_X=ones(size(uniq_decis,1),1); %初始化决策属性后验概率
[M,N]=size(Y);
k=1;
for i=1:M
for j=1:n-1
[temp,loc]=ismember(attribute({Y{i}},j),unique(attribute(X,j)));%决策属性计算后验概率
P_X = post_pro{j}(:,loc).*P_X; %各条件属性后验概率之积(贝叶斯公式)
end
[MAX,I]=max(P_X); %寻找最大值
out{k}=uniq_decis{I}; %哪一类决策属性后验概率最大,则次样本属于那一类
k=k+1;
P_X=ones(size(uniq_decis,1),1);%再次初始化决策属性后验概率P_X,以便为下一样本计算作准备
end
out=out'; %输出结果(转置形式)
%%%%%%%%%%%%%%%%%%%%%各子程序
function y=attribute(X,n) %功能为提取出原数据集X中的第n个属性所对应的一列值
[M,N]=size(X);
for i=1:M
temp{i}=X{i}{n}; %将指定列值以temp暂量保存
end
y=temp';%转置
%%%%%%%%%%%%%%%%%%%%%
function [post_pro,post_name]=post_prob(E,D)
%E为目标属性,D为决策属性,post_pro计算目标属性对应于决策属性的后验概率
%post_name为所求的后验概率变量名称
[M,N]=size(D);
decision=unique(D);%决策属性种类
attri=unique(E); %条件属性种类
[m1,n1]=size(decision);
[m2,n2]=size(attri);
temp=cat(2,E,D); %连接条件属性和决策属性
post_pro=zeros(m1,m2); %后验概率初始化
for i=1:M
for j=1:m2
for k=1:m1
post_name{k,j}=cat(2,{attri{j}},{decision{k}})
if (isequal(temp(i,:),post_name{k,j}))
post_pro(k,j)=post_pro(k,j)+1; %条件属性后验概率(频数)
end
end
end
end
for i=1:m1
post_pro(i,:)=post_pro(i,:)/sum(post_pro(i,:));%求得条件属性后验概率
end
%%%%%%%%%%%%%%%%%%%
function y=probality(E) %计算该属性类的概率
[M,N]=size(E);
class=unique(E); %求该决策属性的类别
[m,n]=size(class);
p=zeros(m,1);%先验概率p初始化
for i=1:M
for j=1:m
if(isequal(E{i},class{j}))
p(j)=p(j)+1; %求各个样本的先验概率(频数)
end
end
end
y=p/M;%得各样本概率
问题是,我运行的时候,提示出现错误
??? Subscript indices must either be real positive integers or logicals.
Error in ==> my_nbc at 41
P_X = post_pro{j}(:,loc).*P_X; %各条件属性后验概率之积(贝叶斯公式)
那位大师给看下,什么问题,该怎么调整啊?