SRI算法

有限内存SRI

对于有限内存SRI算法,利用的修正公式为:
H k = H 0 + ( S k − H 0 Y k ) ( D k + L k + L k T − Y k T H 0 Y k ) − 1 ( S k − H 0 Y k ) T H_{k} = H_{0} + (S_{k} - H_{0}Y_{k})(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1}(S_{k} - H_{0}Y_{k})^T Hk=H0+(SkH0Yk)(Dk+Lk+LkTYkTH0Yk)1(SkH0Yk)T
这里的 H 0 H_{0} H0其实和上面的BFGS算法一样,也是可以任意取的,这里不同的是:

S k = [ s k − m + 1 , … , s k − 1 ] , Y k = [ y k − m + 1 , … , y k − 1 ] S_{k} = [s_{k-m+1},\ldots,s_{k-1}],Y_{k} = [y_{k-m+1},\ldots,y_{k-1}] Sk=[skm+1,,sk1],Yk=[ykm+1,,yk1]
D k = d i a g ( s k − m + 1 T y k − m + 1 , … , s k − 1 T y k − 1 ) D_{k} = diag(s_{k-m+1}^{T}y_{k - m + 1},\ldots,s_{k-1}^{T}y_{k - 1}) Dk=diag(skm+1Tykm+1,,sk1Tyk1)

( L k ) i , j = { ( s k − m − 1 + i ) T ( y k − m − 1 + j )  if  i > j 0  otherwise  \left(L_{k}\right)_{i, j}=\left\{\begin{array}{ll} \left(s_{k-m-1+i}\right)^{T}\left(y_{k-m-1+j}\right) & \text { if } i>j \\ 0 & \text { otherwise } \end{array}\right. (Lk)i,j={(skm1+i)T(ykm1+j)0 if i>j otherwise 

事实上,通过上面的公式,我们很容易发现,其实 D k + L k + L k T = S k Y k T D_{k} + L_{k} + L_{k}^{T}=S_{k}Y_{k}^{T} Dk+Lk+LkT=SkYkT,计算下降方向的流程如下:

q = g k q = g_k q=gk,\
tmp = H 0 H_{0} H0,\
alpha = ( S k − H 0 Y k ) T q (S_{k} - H_{0}Y_{k})^{T}q (SkH0Yk)Tq, \
D = ( S k − H 0 Y k ) ( D k + L k + L k T − Y k T H 0 Y k ) − 1 (S_{k} - H_{0}Y_{k})(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1} (SkH0Yk)(Dk+Lk+LkTYkTH0Yk)1,\
d = t m p ∗ g k + D ∗ a l p h a tmp*g_k + D*alpha tmpgk+Dalpha.\

这个计算过程中,如果 H 0 H_{0} H0我们取对角矩阵,那么整个计算方向的过程就只需要存储一个 m × m m\times m m×m阶的矩阵,以及计算这个 m × m m\times m m×m阶矩阵的逆,这个运算量不大。下面开始展示数值结果。
在这里插入图片描述
在这里插入图片描述
关于这个Extended Freudenstein and Roth function函数和下面的Extended Rosenbrock function函数,在代码运行过程中,本人发现无法直接将上面两个函数的代码照搬过去,事实上,我通过修改goldstein函数里面超参数的选取,也无法得到正确的结果,在通过debug以后,本人发现针对这两个函数,goldstein求解步长失效,无法得到满意的步长,导致溢出。

Penalty函数

import torch
import numpy as np
import time

def FF(X):
    n = X.shape[1]
    al = 1e-5
    return al*((X - 1)**2).sum() + ((X**2).sum() - 0.25)**2
def GG(X):
    n = X.shape[1]
    al = 1e-5
    return 2*al*(X - 1) + 4*X*((X**2).sum() - 0.25)
def HH(X):
    n = X.shape[1]
    al = 1e-5
    return 8*X.T@X + 4*((X**2).sum() - 0.25)*np.eye(n,n)
def gold(FF,GG,d,X):
    rho = 0.4
    
    am = 0;aM = 1;al = 0.5*(am + aM)
    for i in range(100):
        rig1 = FF(X) + rho*al*GG(X)@d.T
        rig2 = FF(X) + (1 - rho)*al*GG(X)@d.T
        lef = FF(X + al*d)
        if lef <= rig1:
            if lef >= rig2:
                break
            else:
                am = al
                al = am + 0.6*(aM - am)
        else:
            aM = al
            al = am + 0.6*(aM - am)
    al_k = al
    #print(i)
    return al_k,2*(i + 1)
def SRI(FF,GG,HH,X,m,N):
    tic = time.time()
    eps = 1e-5
    n = X.shape[1]
    fun_iter = 0
    X_new = np.zeros([1,n])
    S = np.zeros([m,n])
    Y = np.zeros([m,n])
    alpha = np.zeros(m)
    for i in range(N):
        if i <= m:
            if i == 0:
                al_k,ite = gold(FF,GG,-GG(X),X)
                fun_iter += ite
                X_new = X - GG(X)*al_k
            else:
                s = X_new - X
                y = GG(X_new) - GG(X)
                S[i - 1:i,:] = s
                Y[i - 1:i,:] = y
                #d = np.linalg.solve(HH(X_new),-GG(X_new).T)
                d = -GG(X).T
                al_k,ite = gold(FF,GG,d.T,X_new)
                #print(np.linalg.norm(X))
                X = X_new.copy()
                X_new = X_new - al_k*GG(X_new)
                fun_iter += ite
                
        else:
            s = X_new - X
            y = GG(X_new) - GG(X)
            S[0:m - 1,:] = S[1:m,:]
            Y[0:m - 1,:] = Y[1:m,:]
            S[m - 1:m,:] = s
            Y[m - 1:m,:] = y
            tmp = (s*y).sum()/(y*y).sum()
            q = GG(X_new)
            #H_{k} = H_{0} + (S_{k} - H_{0}Y_{k})@(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1}(S_{k} - H_{0}Y_{k})^T
            alpha = np.dot(q,(S - tmp*Y).T)#这里计算g_{k}(S_{k} - H_{0}Y_{k})^T
            D = np.dot(np.linalg.inv(np.dot(S,Y.T) - tmp*np.dot(Y,Y.T)),(S - tmp*Y))
            #这里计算D = (S_{k} - H_{0}Y_{k})@(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1}
            r = tmp*q + np.dot(alpha,D)
            al_k,ite = gold(FF,GG,-r,X_new)
            fun_iter += ite
            X = X_new.copy()
            X_new = X_new + al_k*(-r)
        left = np.linalg.norm(GG(X))
        rig = max(1,np.linalg.norm(X))*eps
        if left < rig:
            break
    ela = time.time() - tic
    print('iteration:%d,grad_2:%.3e,fun_iter = %d,time = %.2f'%(i + 1,(GG(X)**2).sum(),fun_iter,ela))
    return X_new
n = 1000
m = 15
X = np.zeros([1,n])
for i in range(n):
    X[:,i] = i + 1
X_new = SRI(FF,GG,HH,X,m,400)
#print(X_new)
print(FF(X_new))

Trigonometric function

import torch
import numpy as np
import time

def FF(X):
    n = X.shape[1]
    return ((n - np.cos(X).sum() + np.arange(1,n + 1)*(1 - np.cos(X)) - np.sin(X))**2).sum()
def GG(X):
    n = X.shape[1]
    #vec = np.zeros_like(X)
    tmp = n - np.cos(X).sum() + np.arange(1,n + 1)*(1 - np.cos(X)) - np.sin(X)
    return 2*np.sin(X)*tmp.sum() + 2*tmp*(np.arange(1,n + 1)*np.sin(X) - np.cos(X))
def HH(X):
    n = X.shape[1]
    mat = np.zeros([n,n])
    tmp = n - np.cos(X).sum() + np.arange(1,n + 1)*(1 - np.cos(X)) - np.sin(X)
    dia = 2*np.cos(X)*tmp.sum() + 2*np.sin(X)*((np.arange(1,n + 1) + n)*np.sin(X) - np.cos(X)) +\
    2*((np.arange(1,n + 1) + 1)*np.sin(X) - np.cos(X))*(np.arange(1,n + 1)*np.sin(X) - np.cos(X)) + \
    + 2*tmp*(np.sin(X) + np.arange(1,n + 1)*np.cos(X))
    mat = 2*np.sin(X).T*((np.arange(1,n + 1) + n)*np.sin(X) - np.cos(X)) + \
    (np.arange(1,n + 1)*np.sin(X) - np.cos(X)).T*2*np.sin(X)
    mat.flat[::(n + 1)] = dia
    return mat
def gold(FF,GG,d,X):
    rho = 0.4
    
    am = 0;aM = 1;al = 0.5*(am + aM)
    for i in range(100):
        rig1 = FF(X) + rho*al*GG(X)@d.T
        rig2 = FF(X) + (1 - rho)*al*GG(X)@d.T
        lef = FF(X + al*d)
        if lef <= rig1:
            if lef >= rig2:
                break
            else:
                am = al
                al = am + 0.6*(aM - am)
        else:
            aM = al
            al = am + 0.6*(aM - am)
    al_k = al
    #print(i)
    return al_k,2*(i + 1)
def SRI(FF,GG,HH,X,m,N):
    tic = time.time()
    eps = 1e-5
    n = X.shape[1]
    fun_iter = 0
    X_new = np.zeros([1,n])
    S = np.zeros([m,n])
    Y = np.zeros([m,n])
    alpha = np.zeros(m)
    for i in range(N):
        if i <= m:
            if i == 0:
                al_k,ite = gold(FF,GG,-GG(X),X)
                fun_iter += ite
                X_new = X - GG(X)*al_k
            else:
                s = X_new - X
                y = GG(X_new) - GG(X)
                S[i - 1:i,:] = s
                Y[i - 1:i,:] = y
                #d = np.linalg.solve(HH(X_new),-GG(X_new).T)
                d = -GG(X).T
                al_k,ite = gold(FF,GG,d.T,X_new)
                #print(np.linalg.norm(X))
                X = X_new.copy()
                X_new = X_new - al_k*GG(X_new)
                fun_iter += ite
                
        else:
            s = X_new - X
            y = GG(X_new) - GG(X)
            S[0:m - 1,:] = S[1:m,:]
            Y[0:m - 1,:] = Y[1:m,:]
            S[m - 1:m,:] = s
            Y[m - 1:m,:] = y
            tmp = (s*y).sum()/(y*y).sum()
            q = GG(X_new)
            #H_{k} = H_{0} + (S_{k} - H_{0}Y_{k})@(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1}(S_{k} - H_{0}Y_{k})^T
            alpha = np.dot(q,(S - tmp*Y).T)#这里计算g_{k}(S_{k} - H_{0}Y_{k})^T
            D = np.dot(np.linalg.inv(np.dot(S,Y.T) - tmp*np.dot(Y,Y.T)),(S - tmp*Y))
            #这里计算D = (S_{k} - H_{0}Y_{k})@(D_{k} + L_{k} + L_{k}^{T} - Y_{k}^{T}H_{0}Y_{k})^{-1}
            r = tmp*q + np.dot(alpha,D)
            al_k,ite = gold(FF,GG,-r,X_new)
            fun_iter += ite
            X = X_new.copy()
            X_new = X_new + al_k*(-r)
        left = np.linalg.norm(GG(X))
        rig = max(1,np.linalg.norm(X))*eps
        if left < rig:
            break
    ela = time.time() - tic
    print('iteration:%d,grad_2:%.3e,fun_iter = %d,time = %.2f'%(i + 1,(GG(X)**2).sum(),fun_iter,ela))
    return X_new
n = 1000
m = 5
X = np.zeros([1,n])
for i in range(n):
    X[:,i] = i + 1
X_new = SRI(FF,GG,HH,X,m,400)
#print(X_new)
print(FF(X_new))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Galerkin码农选手

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值