线性判别分析LDA–以COVID19数据集为例,对其可视化并求解精度
1.可视化及精度结果
2.代码实现
myLDA.m
function [w] = myLDA(X0,X1)
n0=size(X0,1);
n1=size(X1,1);
e0=ones(n0,1);e1=ones(n1,1);
I0=eye(n0);I1=eye(n1);
E0=ones(n0);E1=ones(n1);
Sb = (X0'*e0/n0-X1'*e1/n1)*(X0'*e0/n0-X1'*e1/n1)';
Sw = X0'*(I0-E0/n0)*X0+X1'*(I1-E1/n1)*X1;
[u,v] = eig(Sb,Sw);
[~,b]=max(max(v));
w = u(:,b);
end
myBayes.m
function [Pred_lab]=myBayes(newTrain,Train_lab,newTest)
[nTest,nFea]=size(newTest);
Pred_lab=zeros(nTest,1);
idx0=find(Train_lab==0);%普通肺炎
idx1=find(Train_lab==1);%COVID19
p0=length(idx0)/length(Train_lab);
p1=1-p0;
u0=mean(newTrain(idx0,:))';
u1=mean(newTrain(idx1,:))';
s0=std(newTrain(idx0,:))';
s1=std(newTrain(idx1,:))';
for i=1:nTest
x=newTest(i,:);
prob0=p1*s0.*exp(-(x-u0).^2/(2*s0.^2));
prob1=p0*s1.*exp(-(x-u1).^2/(2*s1.^2));
prob=prob1/prob0;
if prob>1
preL=1;
else
preL=0;
end
Pred_lab(i)=preL;
end
LDA.m
clc;clear;
load ('COVID19.mat')
nFold = 5;
[n,p] = size(X); % n is the number of samples
c_out = cvpartition(n,'k',nFold);
Acc=zeros(nFold,1);
for k = 1:nFold
Train_dat = X(training(c_out,k),:);
Train_lab = Y(training(c_out,k));
Test_dat = X(test(c_out,k),:);
Test_lab = Y(test(c_out,k));
idx0=find(Train_lab == 0);
idx1=find(Train_lab == 1);
X0 = Train_dat(idx0,:);%训练集中非新冠
X1 = Train_dat(idx1,:);%训练集中新冠
w = myLDA(X0,X1);
%求精度
newTrain = Train_dat*w;
newTest = Test_dat*w;
Pred_lab = myBayes(newTrain,Train_lab,newTest);
Acc(k)=length(find(Test_lab-Pred_lab==0))/length(Test_lab);
end
mean(Acc)
%可视化
newTrain = Train_dat*w;
X00 = newTrain(idx0,:);X11 = newTrain(idx1,:);
h = w(2)/w(1);
plot(X00,X00*h,'b.');%普通肺炎
hold on
plot(X11,X11*h,'r.');%新冠肺炎
legend('蓝色表示普通肺炎','红色表示新冠肺炎')