function accuracy = myAdaboostMulticlass( Dtrain,Dtest,baseLearner,T,tag)
%myAdaboostMulticlass Summary of this function goes here
% input: Dtrain——training dataset
% Dtest——testing dataset(with label)
% baseLearner——base learner(0 represent decision stump, 1 represnet Naive Bayes)
% T——training rounds
% tag—— attribute discrete or continuous(0 represent discrete, 1 represent continuous)
% output:accuracy on the training set
%adaboost这个算法我是照着紫色的数据挖掘书写的,区别是在第4行不会break掉
pT=ones(size(Dtest,1),T); %every colomn is generated by a base learner
p=ones(size(Dtest,1),1); %every sample's final result
alphaT=zeros(T,1)*(-1); %weight of each base learner. Original value is -1.
weight_sample=ones(size(Dtrain,1),1)/size(Dtrain,1); %Originate the sampling weight of each training sample
for t=1:T
%sample from training set with withdrawal
sampleIndex = randsample(size(Dtrain,1),size(Dtrain,1),'true',weight_sample);
DtrainSample=Dtrain(sampleIndex,:); %traing set after sampling for this round
%compute the tth colomn of pT
train_predict=zeros(size(Dtrain,1),1);
if baseLearner==0 %base learner is dicision dump
pT(:,t)=computeDeision(DtrainSample,Dtest(:,1:(end-1)),tag); %remove the last attribute of Dtest
train_predict=computeDeision(DtrainSample,Dtrain(:,1:(end-1)),tag);
elseif baseLearner==1 %base learner is Naive Bayes
pT(:,t)=computeNB(DtrainSample,Dtest(:,1:(end-1)),tag);
train_predict=computeNB(DtrainSample,Dtrain(:,1:(end-1)),tag);
end
train_predict=train_predict(:); %transform to column vector
errorate= sum( weight_sample.* (Dtrain(:,end) ~=train_predict ) );
if errorate> 0.5 %threshold for originating weights of training samples
alphaT(t)=0;
%originate the sampling weight of each training sample
weight_sample=ones(size(Dtrain,1),1)/size(Dtrain,1);
continue;
end
alphaT(t)=0.5*log((1-errorate)/errorate );
%renew weight_sample
weight_sample=weight_sample.* exp(alphaT(t)*( (-2)*( Dtrain(:,end)==train_predict)+1));
weight_sample=weight_sample/sum(weight_sample);
end
%accumulate the result from each base learner. Add up the vote for each label.
for i=1:size(Dtest)
lable_vote=zeros(max(Dtrain(:,end)),1);
for t=1:T
lable_vote( pT(i,t) )=lable_vote(pT(i,t))+alphaT(t);
end
[~,p(i)]=max(lable_vote);
end
accuracy=sum((p==Dtest(:,end)))/size(Dtest,1);
end
文章标题
最新推荐文章于 2022-05-12 11:32:07 发布