%% % svm 简单算法设计 --启发式选择 %% clc clear close all % step=0.05;error=1.2; % [data, label]=generate_sample(step,error); category=load('category.mat'); label=category.label; feature=load('feature.mat'); data=feature.data; [num_data,d] = size(data); % 样本数量,维度,维度在下面好像没有用到 %% 定义向量机参数 alphas = ones(num_data,1)-0.999999; b = 0; error = zeros(num_data,2); tol = 0.001; C = 600000; iter = 0; max_iter = 30; alpha_change = 0; entireSet = 1;%作为一个标记看是选择全遍历还是部分遍历 %第一个变量先遍历间隔边界(0<alpha<C)上的支持向量点(此时松弛变量等于0),检验其是否满足KKT条件,若全部满足再遍历整个样本 %第一个变量选取违反KKT条件最严重的样本点所对应的变量,意思是首先更新最糟糕的点 %选择第二个变量要使得|E1-E2|最大,即使得乘子的变化最大,要用启发式标准 %第二个变量的选择好像是先看有没有违反KKT条件的点,若有则选择,若没有则按照|E1-E2|来选择 while (iter < max_iter) && ((alpha_change > 0) || entireSet) alpha_change = 0; % -----------全遍历样本------------------------- if entireSet for i = 1:num_data Ei = calEk(data,alphas,label,b,i);%计算误差 %此处的条件既是选取第一个变量的标准,首先考虑的是间隔边界(0<alpha<C)上的支持向量点中不满足KKT条件的点所对应的变量 %该条件困扰了我两天,实际上原来的写法过于虚伪,让人看不透摸不清,实际上写清楚了让人一看就明了。 if (label(i)*Ei<-0.001 && alphas(i)<C)||(label(i)*Ei>0.001 && alphas(i)>0) %if (0<alphas(i) && alphas(i)<C && label(i)*Ei~=0)%写成这个形式要让alphas的初值大于零否则进不来循环体。 %选择下一个alphas [j,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,entireSet); alpha_I_old = alphas(i); alpha_J_old = alphas(j); if label(i) ~= label(j) L = max(0,alphas(j) - alphas(i)); H = min(C,C + alphas(j) - alphas(i)); else L = max(0,alphas(j) + alphas(i) -C); H = min(C,alphas(j) + alphas(i)); end if L==H continue;end eta = 2*data(i,:)*data(j,:)'- data(i,:)*... data(i,:)' - data(j,:)*data(j,:)'; if eta >= 0 continue;end alphas(j) = alphas(j) - label(j)*(Ei-Ej)/eta; %限制范围 if alphas(j) > H alphas(j) = H; elseif alphas(j) < L alphas(j) = L; end if abs(alphas(j) - alpha_J_old) < 1e-4 continue;end alphas(i) = alphas(i) + label(i)*label(j)*(alpha_J_old-alphas(j)); b1 = b - Ei - label(i)*(alphas(i)-alpha_I_old)*data(i,:)*data(i,:)'- label(j)*(alphas(j)-alpha_J_old)*data(i,:)*data(j,:)'; b2 = b - Ej - label(i)*(alphas(i)-alpha_I_old)*data(i,:)*data(j,:)'- label(j)*(alphas(j)-alpha_J_old)*data(j,:)*data(j,:)'; if (alphas(i) > 0) && (alphas(i) < C) b = b1; elseif (alphas(j) > 0) && (alphas(j) < C) b = b2; else b = (b1+b2)/2; end alpha_change = alpha_change + 1; end end iter = iter + 1; % --------------部分遍历(alphas=0~C)的样本-------------------------- else index = find(alphas>0 & alphas < C); for ii = 1:length(index) i = index(ii); Ei = calEk(data,alphas,label,b,i);%计算误差 if (label(i)*Ei<-0.001 && alphas(i)<C)||... (label(i)*Ei>0.001 && alphas(i)>0) %选择下一个样本 [j,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,entireSet); alpha_I_old = alphas(i); alpha_J_old = alphas(j); if label(i) ~= label(j) L = max(0,alphas(j) - alphas(i)); H = min(C,C + alphas(j) - alphas(i)); else L = max(0,alphas(j) + alphas(i) -C); H = min(C,alphas(j) + alphas(i)); end if L==H continue;end eta = 2*data(i,:)*data(j,:)'- data(i,:)*... data(i,:)' - data(j,:)*data(j,:)'; if eta >= 0 continue;end alphas(j) = alphas(j) - label(j)*(Ei-Ej)/eta; %限制范围 if alphas(j) > H alphas(j) = H; elseif alphas(j) < L alphas(j) = L; end if abs(alphas(j) - alpha_J_old) < 1e-4 continue;end alphas(i) = alphas(i) + label(i)*... label(j)*(alpha_J_old-alphas(j)); b1 = b - Ei - label(i)*(alphas(i)-alpha_I_old)*... data(i,:)*data(i,:)'- label(j)*... (alphas(j)-alpha_J_old)*data(i,:)*data(j,:)'; b2 = b - Ej - label(i)*(alphas(i)-alpha_I_old)*... data(i,:)*data(j,:)'- label(j)*... (alphas(j)-alpha_J_old)*data(j,:)*data(j,:)'; if (alphas(i) > 0) && (alphas(i) < C) b = b1; elseif (alphas(j) > 0) && (alphas(j) < C) b = b2; else b = (b1+b2)/2; end alpha_change = alpha_change + 1; end end iter = iter + 1; end % -------------------------------- if entireSet %第一次全遍历了,下一次就变成部分遍历 entireSet = 0; elseif alpha_change == 0 %如果部分遍历所有都没有找到需要交换的alpha,再改为全遍历 entireSet = 1; end disp(['iter ================== ',num2str(iter)]); end % 计算权值W W = (alphas.*label)'*data; %记录支持向量位置 index_sup = find(alphas ~= 0); %计算预测结果 predict = (alphas.*label)'*(data*data') + b; predict = sign(predict); % 显示结果 figure; index1 = find(predict==-1); data1 = (data(index1,:))'; plot(data1(1,:),data1(2,:),'+r'); hold on index2 = find(predict==1); data2 = (data(index2,:))'; plot(data2(1,:),data2(2,:),'*'); hold on dataw = (data(index_sup,:))'; plot(dataw(1,:),dataw(2,:),'og','LineWidth',2); % 画出分界面,以及b上下正负1的分界面 hold on k = -W(1)/W(2); x = -1.2:0.1:1.2; y = k*x + b; plot(x,y,x,y-1,'r--',x,y+1,'r--'); title(['松弛变量范围C = ',num2str(C)]);
function Ek = calEk(data,alphas,label,b,k) pre_Li = (alphas.*label)'*(data*data(k,:)') + b; Ek = pre_Li - label(k);
function [J,Ej] = select(i,data,num_data,alphas,label,b,C,Ei,choose) maxDeltaE = 0;maxJ = -1; if choose == 1 %全遍历---随机选择alphas j = randi(num_data ,1); if j == i temp = 1; while temp j = randi(num_data,1); if j ~= i temp = 0; end end end J = j; Ej = calEk(data,alphas,label,b,J); else %部分遍历--启发式的选择alphas index = find(alphas>0 & alphas < C); for k = 1:length(index) if i == index(k) continue; end temp_e = calEk(data,alphas,label,b,k); deltaE = abs(Ei - temp_e); %选择与Ei误差最大的alphas if deltaE > maxDeltaE maxJ = k; maxDeltaE = deltaE; Ej = temp_e; end end J = maxJ; end