本算法是根据《机器学习实战》改编而来,原书是用Phthon写的,后觉得phthon用着不习惯,所以改写matlab,具体含义,参见该书即可,很详细。
下面是主程序adaBoost.m:
clear;
dataArr=[1,2.1;2,1.1;1.3,1;1,1;2,1];
classLabels=[1.0,1.0,-1.0,-1.0,1.0]';
% adaBoostTrainDS(dataArr,classLabels,numIt),单层决策树的Adaboost训练过程
numIt=9;
weakClassArr={};
m=size(dataArr,1);
D=ones(m,1)/m;
aggClassEst=zeros(m,1);
for i=1:numIt
r=buildStump(dataArr,classLabels,D);
alpha=0.5*log((1-r{1,2})/r{1,2});
r{1,size(r,2)+1}=alpha;
weakClassArr{size(weakClassArr,1)+1,1}=r;
expon=-1*alpha*classLabels.*r{1,3};
D=D.*exp(expon);
D=D/sum(D);
aggClassEst=aggClassEst+alpha*r{1,3};
ff=sign(aggClassEst)-classLabels;
ff(find(ff~=0))=1;
aggErrors=ff.*ones(m,1);
errorRate=sum(aggErrors)/m;
if errorRate==0
break;
end
end
函数文件如下buildStump.m:
function result=buildStump(dataArr,classLabels,D)
[m,n]=size(dataArr);
numsteps=10;
bestStump=zeros(1,3);
bestClasEst=zeros(m,1);
minErr=inf;
%0代表lt,1代表gt
inequal=[0,1];
for i=1:n
rangeMin=min(dataArr(:,i));
rangeMax=max(dataArr(:,i));
stepSize=(rangeMax-rangeMin)/numsteps;
for j=-1:numsteps+1
for k=0:1
threshval=rangeMin+j*stepSize;
predictVals=stumpclassify(dataArr,i,threshval,k);
errArr=ones(m,1);
%找到相等的行
cc=find((predictVals-classLabels)==0);
errArr(cc)=0;
weightErr=D'*errArr;
if weightErr<minErr
minErr=weightErr;
bestClasEst=predictVals;
bestStump(1)=i;
bestStump(2)=threshval;
bestStump(3)=k;
end
end
end
end
result={bestStump,minErr,bestClasEst};
end
%单层决策树分类
function classify_result=stumpclassify(data,dimen,threshval,threshIneq)
retArray=ones(size(data,1),1);
if threshIneq==0
dd=find((data(:,dimen)-threshval)<=0);
retArray(dd)=-1.0;
else
dd=find((data(:,dimen)-threshval)>0);
retArray(dd)=-1.0;
end
classify_result=retArray;
end