现在来讲拟牛顿法,主要讲两种主要的拟牛顿法,第一种拟牛顿法是由Davidon提出来的,后经Fletcher和Powell修改整理的,所以称为DPF方法。第二种著名的拟牛顿法是由Broyden,Fletcher,Glodfard,Shanno四人独立提出来的,所以常被称为BFGS方法。下面给出拟牛顿法的算法框架。
下面给出实验结果
Quasi_newton.m
x = load('ex3x.dat');
y = load('ex3y.dat');
trustRegionBound = 1000;
x = [ones(size(x,1),1) x];
meanx = mean(x);%求均值
sigmax = std(x);%求标准偏差
x(:,2) = (x(:,2)-meanx(2))./sigmax(2);
x(:,3) = (x(:,3)-meanx(3))./sigmax(3);
itera_num = 1000; %尝试的迭代次数
sample_num = size(x,1); %训练样本的次数
jj=0.00001;
figure
alpha = [0.1];%因为差不多是选取每个3倍的学习率来测试,所以直接枚举出来
plotstyle = {'b-'};
theta_grad_descent = zeros(size(x(1,:)));
theta_old = zeros(size(x,2),1); %theta的初始值赋值为0
Jtheta = zeros(itera_num, 1);
Jtheta(1) = (1/(2*sample_num)).*(x*theta_old-y)'*(x*theta_old-y);
grad1 = (1/sample_num).*x'*(x*theta_old-y);
Q=x'*x;
a=(grad1'*grad1)/(grad1'*Q*grad1);
H=inv(Q);
d1=-(H*grad1);
theta_new=theta_old+a*d1;
for i = 2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数
Jtheta(i) = (1/(2*sample_num)).*(x*theta_new-y)'*(x*theta_new-y);%Jtheta是个行向量
grad_old=(1/sample_num).*x'*(x*theta_old-y);
grad_new = (1/sample_num).*x'*(x*theta_new-y);
L=grad_new-grad_old;
s=theta_new-theta_old;
H=H-(H'*L*L'*H)/(L'*H*L)+(s*s')/(s'*L);
d=-H*grad_new;
a=(grad_new'*grad_new)/(grad_new'*Q*grad_new);
theta_old=theta_new;
theta_new = theta_new + a*d;
end
K(1)=Jtheta(500) ;
plot(0:99, Jtheta(1:100),'k-','LineWidth', 4);
hold on
theta_old = zeros(size(x,2),1); %theta的初始值赋值为0
Jtheta = zeros(itera_num, 1);
Jtheta(1) = (1/(2*sample_num)).*(x*theta_old-y)'*(x*theta_old-y);
grad1 = (1/sample_num).*x'*(x*theta_old-y);
Q=x'*x;
a=(grad1'*grad1)/(grad1'*Q*grad1);
H=inv(Q);
d1=-(H*grad1);
theta_new=theta_old+a*d1;
for i = 2:itera_num %计算出某个学习速率alpha下迭代itera_num次数后的参数
Jtheta(i) = (1/(2*sample_num)).*(x*theta_new-y)'*(x*theta_new-y);%Jtheta是个行向量
grad_old=(1/sample_num).*x'*(x*theta_old-y);
grad_new = (1/sample_num).*x'*(x*theta_new-y);
L=grad_new-grad_old;
s=theta_new-theta_old;
H=H-(H*L*s'+s*L'*H)/(L'*s)+(1+(L'*H*L)/(s'*L))*(s*s')/(s'*L);
d=-H*grad_new;
a=(grad_new'*grad_new)/(grad_new'*Q*grad_new);
theta_old=theta_new;
theta_new = theta_new + a*d;
end
K(1)=Jtheta(500) ;
plot(0:99, Jtheta(1:100),'r-','LineWidth', 2);
hold on
%%
legend('quasi-Newton-DFP','quasi-Newton-BFGS');
xlabel('Number of iterations')
ylabel('Cost function')