共轭梯度法(Conjugate Gradient Methods-CG)在回归分析的应用和比较

  前4节讲了Trust-Region+DogLeg、最速下降法(SD)、Barzilar, Borwein(BB)法。

  信赖域法:Trust-Region+DogLeg  

  梯度法:最速下降法(SD)、Barzilar, Borwein(BB)

    这节将会讲一种共轭梯度法(CG),将和前面几种方法进行比较,比较其收敛性和最小迭代次数。

  共轭梯度法(CG)的算法框架:

 

                       


实验结果如下:


β的所有表达方式,都重合,说明这几种β的表达方式产生一样的结果,为上图的加粗的线。

main.m

<pre name="code" class="plain">x = load('ex3x.dat');      
y = load('ex3y.dat');      
      
trustRegionBound = 1000;      
x = [ones(size(x,1),1) x];      
meanx = mean(x);%求均值      
sigmax = std(x);%求标准偏差      
x(:,2) = (x(:,2)-meanx(2))./sigmax(2);      
x(:,3) = (x(:,3)-meanx(3))./sigmax(3);      
itera_num = 1000; %尝试的迭代次数      
sample_num = size(x,1); %训练样本的次数      
jj=0.00001;  
figure      
alpha = [0.03, 0.1, 1];%因为差不多是选取每个3倍的学习率来测试,所以直接枚举出来      
plotstyle = {'b', 'r', 'g'};       
theta_grad_descent = zeros(size(x(1,:)));
plotstyle1 = {'y-', 'k-', 'c-','b--','r-'}; 

%% CG方法
for ii=1:5
theta = zeros(size(x,2),1); %theta的初始值赋值为0      
Jtheta = zeros(itera_num, 1);  
Jtheta(1) = (1/(2*sample_num)).*(x*theta-y)'*(x*theta-y);  
grad1=(1/sample_num).*x'*(x*theta-y);  
Q=x'*x;
d1=-grad1;
a1=-(grad1'*d1)/(d1'*Q*d1);
theta=theta+a1*d1;
d_old=d1;
for i = 2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta-y)'*(x*theta-y);%Jtheta是个行向量  
        g=(1/sample_num).*x'*(x*theta-y);
        d=-g;
        switch ii
        case 1
        beta=(g'*Q*d_old)/(d_old'*Q*d_old);
            case 2
                beta=(g'*g)/((-d_old)'*(-d_old));
            case 3
                beta=((g+d_old)'*g)/((-d_old)'*(-d_old));
            case 4
                beta=((g+d_old)'*g)/(d_old'*(g+d_old));
            case 5
                beta=(g'*g)/(d_old'*(g+d_old));
        end
        d_new=-g+beta*d_old;
        a=-(g'*d)/(d'*Q*d);
        theta=theta+a*d;
        d_old=d_new;   
end  
plot(0:50, Jtheta(1:51),char(plotstyle1(ii)),'LineWidth', ii);%此处一定要通过char函数来转换   
hold on  
end

%% BB(1)+(2)法   
% BB(1)  
theta_old = zeros(size(x,2),1); %theta的初始值赋值为0      
Jtheta = zeros(itera_num, 1);  
%求解a1,d1  
Jtheta(1) = (1/(2*sample_num)).*(x*theta_old-y)'*(x*theta_old-y);  
grad1=(1/sample_num).*x'*(x*theta_old-y);  
Q=x'*x;  
d1=-grad1;  
a1=(grad1'*grad1)/(grad1'*Q*grad1);  
theta_new=theta_old+a1*d1;  
for i = 2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta_new-y)'*(x*theta_new-y);%Jtheta是个行向量  
        grad_old=(1/sample_num).*x'*(x*theta_old-y);  
        grad_new = (1/sample_num).*x'*(x*theta_new-y);  
        if abs(grad_new)<jj  
            M=i;  
            break;  
        end  
        d=-grad_new;    
        s=theta_new-theta_old;  
        g=grad_new-grad_old;  
        a=(s'*g)/(g'*g);    
        theta_old=theta_new;  
        theta_new = theta_old + a*d;    
end  
plot(0:M-1, Jtheta(1:M),'k--','LineWidth', 4)%此处一定要通过char函数来转换   
hold on   
%BB(2)  
theta_old = zeros(size(x,2),1); %theta的初始值赋值为0      
Jtheta = zeros(itera_num, 1);  
%求解a1,d1  
Jtheta(1) = (1/(2*sample_num)).*(x*theta_old-y)'*(x*theta_old-y);  
grad1=(1/sample_num).*x'*(x*theta_old-y);  
Q=x'*x;  
d1=-grad1;  
a1=(grad1'*grad1)/(grad1'*Q*grad1);  
theta_new=theta_old+a1*d1;  
for i = 2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta_new-y)'*(x*theta_new-y);%Jtheta是个行向量  
        grad_old=(1/sample_num).*x'*(x*theta_old-y);  
        grad_new = (1/sample_num).*x'*(x*theta_new-y);  
        if abs(grad_new)<jj  
            M1=i;  
            break;  
        end  
        d=-grad_new;    
        s=theta_new-theta_old;  
        g=grad_new-grad_old;  
        a=(s'*s)/(s'*g);    
        theta_old=theta_new;  
        theta_new = theta_old + a*d;    
end  
plot(0:M1-1, Jtheta(1:M1),'r--','LineWidth', 3)%此处一定要通过char函数来转换   
hold on   
  
    %% 信赖域+狗腿法    
theta = zeros(size(x,2),1); %theta的初始值赋值为0      
Jtheta = zeros(itera_num, 1);      
 for i = 1:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta-y)'*(x*theta-y);%Jtheta是个行向量      
        grad = (1/sample_num).*x'*(x*theta-y);      
        B=x'*x;      
        du = -grad' * grad * grad / (grad' * B * grad);      
        dB = -B^-1 * grad;      
        a = 2;      
        if du'*du > trustRegionBound*trustRegionBound;      
        a = trustRegionBound / sqrt((du'*du));      
        else if dB'*dB > trustRegionBound*trustRegionBound      
        a = sqrt((trustRegionBound*trustRegionBound - du'*du) / ((dB-du)'*(dB-du))) + 1;      
            end          
        end      
        if a < 1      
        d = a * du;      
        else      
        d = du + (a - 1) * (dB - du);      
        end      
        Jtheta1(i)=(1/(2*sample_num)).*(x*(theta+d)-y)'*(x*(theta+d)-y);      
        p = (Jtheta(i)-Jtheta1(i))/(-grad'*d-1/2*d'*B*d);      
            if p > 0.75 && sqrt(abs(d'*d) - trustRegionBound) < 0.001      
        trustRegionBound = min(2 * trustRegionBound, 10000);      
           else if p < 0.25      
            trustRegionBound = sqrt(abs(d'*d)) * 0.25;      
               end      
            end      
         if p > 0%q(zeros(2,1),x) > q(d, x)      
        theta = theta + d;      
         end      
 end      
K(1)=Jtheta(500);      
    plot(0:50, Jtheta(1:51),'k--','LineWidth', 2)%此处一定要通过char函数来转换      
    hold on      
%% 固定学习率法    
      
theta_grad_descent = zeros(size(x(1,:)));      
for alpha_i = 1:length(alpha) %尝试看哪个学习速率最好      
    theta = zeros(size(x,2),1); %theta的初始值赋值为0      
    Jtheta = zeros(itera_num, 1);      
    for i = 1:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta-y)'*(x*theta-y);%Jtheta是个行向量      
        grad = (1/sample_num).*x'*(x*theta-y);      
        theta = theta - alpha(alpha_i).*grad;      
    end      
    K(alpha_i+1)=Jtheta(500);      
    plot(0:50, Jtheta(1:51),char(plotstyle(alpha_i)),'LineWidth', 2)%此处一定要通过char函数来转换      
    hold on      
end  
  
%% SD算法  
theta = zeros(size(x,2),1); %theta的初始值赋值为0      
Jtheta = zeros(itera_num, 1);      
 for i = 1:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数             
        Jtheta(i) = (1/(2*sample_num)).*(x*theta-y)'*(x*theta-y);%Jtheta是个行向量      
        grad = (1/sample_num).*x'*(x*theta-y);      
        Q=x'*x;    
        d=-grad;    
        a=(grad'*grad)/(grad'*Q*grad);    
        theta = theta + a*d;       
 end      
K(1)=Jtheta(500)      
    plot(0:50, Jtheta(1:51),'b--','LineWidth', 2);    
    hold on      
%%  
legend('CG','CG-FR','CG-PRP','CG-HS','CG-DY','BB(1)','BB(2)','Trust Region with DogLeg','0.03','0.1','1','Steepest Descent');      
xlabel('Number of iterations')      
ylabel('Cost function')      


 

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值