个人学习记录,只进行了书上例题的结果验证,没有改写成函数。
%%%%%% 朴素贝叶斯算法 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clc
clear
X = {1,'S';1,'M';1,'M';1,'S';1,'S';2,'S';2,'M';2,'M';2,'L';2,'L';3,'L';3,'M';3,'M';3,'L';3,'L'};
Y = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]';
x = {2,'S'};
%%%%%%% 输入参数X:训练数据的特征集 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% Y:训练数据的目标集 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% x:待分类数据的特征 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%% 输出参数y:待分类数据的类别 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Num = length(Y); %训练样本总数
%计算正负类的先验概率
PositiveIndex = find(Y == 1);
NegativeIndex = find(Y == -1);
PositiveNum = length(PositiveIndex); %训练集中正类的个数
NegativeNum = length(NegativeIndex); %训练集中负类的个数
format rat
PositiveP = PositiveNum/Num; %正类的先验概率
NegativeP = NegativeNum/Num; %负类的先验概率
%计算条件概率,即类别确定的条件下,训练集中不同特征取值的概率
PositiveX1 = X(PositiveIndex,1); %正类对应的特征1
PositiveX2 = X(PositiveIndex,2); %正类对应的特征2
NegativeX1 = X(NegativeIndex,1); %负类对应的特征1
NegativeX2 = X(NegativeIndex,2); %负类对应的特征2
P_PositiveX11 = length(find(cell2mat(PositiveX1)==1))/PositiveNum;
P_PositiveX12 = length(find(cell2mat(PositiveX1)==2))/PositiveNum;
P_PositiveX13 = length(find(cell2mat(PositiveX1)==3))/PositiveNum;
P_PositiveX21 = length(find(cell2mat(PositiveX2)=='S'))/PositiveNum;
P_PositiveX22 = length(find(cell2mat(PositiveX2)=='M'))/PositiveNum;
P_PositiveX23 = length(find(cell2mat(PositiveX2)=='L'))/PositiveNum;
P_NegativeX11 = length(find(cell2mat(NegativeX1)==1))/NegativeNum;
P_NegativeX12 = length(find(cell2mat(NegativeX1)==2))/NegativeNum;
P_NegativeX13 = length(find(cell2mat(NegativeX1)==3))/NegativeNum;
P_NegativeX21 = length(find(cell2mat(NegativeX2)=='S'))/NegativeNum;
P_NegativeX22 = length(find(cell2mat(NegativeX2)=='M'))/NegativeNum;
P_NegativeX23 = length(find(cell2mat(NegativeX2)=='L'))/NegativeNum;
%待分类实例x的后验概率
P_Positive_x = [PositiveP*P_PositiveX12*P_PositiveX21,1];
P_Negative_x = [NegativeP*P_NegativeX12*P_NegativeX21,-1];
%选择后验概率最大的类别作为实例x的类别
P_x = [P_Positive_x;P_Negative_x];
y = P_x(find(P_x(:,1) == max(P_x(:,1))),2)