用adaboost的方法对所给数据点进行分类。
matlab代码:
data_train = load('hw2_adaboost_train.dat');
figure(1);
plot(data_train(data_train(:,3)==1,1),data_train(data_train(:,3)==1,2),'r*'); hold on
plot(data_train(data_train(:,3)==-1,1),data_train(data_train(:,3)==-1,2),'bo'); %画图
T = 300;
N = size(data_train,1);
feature = size(data_train,2) - 1;
%for iter=1:T %伪代码逻辑示意
% for i=1:2
% for j=1:N
% thred遍历
% end
% 最优(阈值,s)
% end
% 最优(阈值,s,i)
% 更新u
%end
for i = 1:feature
%先对feature进行排序,会比较节约时间,三维数组存放
sdata_f(:,:,i) = sort(data_train,i,'ascend');
end
q16 = [];
theta_iter = ones(T,1); %记录每次迭代获得的参数
s_iter = ones(T,1);
i_iter = ones(T,1);
alpha_iter = ones(T,1);
u = ones(N,1)/N; %u权值初始化,原数据,没有排序前的顺序
in_pre = zeros(N,1);
for iter = 1:T
err_iter = 1;
for i = 1:2
sdata = sdata_f(:,:,i);
%初始化,theta负无穷
theta = -inf;
pre = sign(data_train(:,i)-theta); %先默认s=1,之后纠正
errj = sum(u(pre~=data_train(:,3)))/sum(u);
sj = 1;
if errj > 0.5
sj = -1;
errj = 1-errj;
end
for j = 1:N-1
s = 1;
theta_temp = (sdata(j,i)+sdata(j+1,i))/2;
pre = sign(data_train(:,i)-theta_temp);
err_temp = sum(u(pre~=data_train(:,3)))/sum(u);
if err_temp > 0.5
s = -1;
err_temp = 1-err_temp;
end
if err_temp < errj
errj = err_temp; %更新errj
theta = theta_temp; %记录theta,s
sj = s;
end
end
if errj < err_iter
err_iter = errj;
theta_iter(iter) = theta; %迭代记录
s_iter(iter) = sj;
i_iter(iter) = i;
end
end
q16 = [q16 err_iter]; %question 16
diamonds_t = sqrt((1-err_iter)/err_iter);
alpha_iter(iter) = log(diamonds_t);
pre = s_iter(iter)*sign(data_train(:,i_iter(iter))-theta_iter(iter));
in_pre = alpha_iter(iter).* pre + in_pre;
u(pre~=data_train(:,3)) = u(pre~=data_train(:,3)) * diamonds_t; %update u.
u(pre==data_train(:,3)) = u(pre==data_train(:,3)) / diamonds_t;
sum(u); %question 15
figure(2)
hold on
if i_iter(iter)==1
plot([theta_iter(iter),theta_iter(iter)],[0,1],'color',[0.8,0.8,0.8]);
else
plot([0,1],[theta_iter(iter),theta_iter(iter)],'color',[0.8,0.8,0.8]);
end
end
% in_pre = sign(in_pre);
% plot(data_train(in_pre==1,1),data_train(in_pre==1,2),'r*'); %画图
% hold on
% plot(data_train(in_pre==-1,1),data_train(in_pre==-1,2),'bo');
%test
data_test = load('hw2_adaboost_test.dat');
plot(data_test(data_test(:,3)==1,1),data_test(data_test(:,3)==1,2),'r*');hold on %画图
plot(data_test(data_test(:,3)==-1,1),data_test(data_test(:,3)==-1,2),'bo'); %画图
out_pre = zeros(size(data_test,1),1);
for i=1:T
out_pre = s_iter(i).*sign(data_test(:,i_iter(i))-theta_iter(i)) + out_pre;
end
out_pre = sign(out_pre);
figure(3)
plot(data_test(out_pre==1,1),data_test(out_pre==1,2),'r*'); hold on %画图
plot(data_test(out_pre==-1,1),data_test(out_pre==-1,2),'bo');
sum(out_pre~=data_test(:,3))/size(data_test,1)
代码有点乱,不过逻辑大概如此。