Numerical analysis 信赖域方法-Dogleg 源码解析_either the hessian or the hessian-vector product i-CSDN博客

本文链接：https://blog.csdn.net/qq_38662930/article/details/105393581

计算柯西点列，即最速下降方向 $d^B=-\frac{g^Tg}{g^TBg}g$ ，如果最速下降方向超出了信赖域，返回与信赖域的交点。
在这里插入图片描述
这时可以求得方向为 $信赖域半径除以d^U 的范数* d^U$
即p_boundary = p_u * (trust_radius / p_u_norm)


    def cauchy_point(self):
        """
        The Cauchy point is minimal along the direction of steepest descent.
        """
        if self._cauchy_point is None:
            g = self.jac
            Bg = self.hessp(g)//表示hessian矩阵和一阶导数梯度的内积
            self._cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * g
        return self._cauchy_point

计算牛顿步，即近似函数 $m(d)=f+g^Td+d^TBd$ 的全局最优解（不考虑约束（d必须在信赖域半径中））， $p^B=-B^{-1}g$ ,这里没有直接求逆矩阵的方法，而是用了LU分解求解线性方程组的形式

 def newton_point(self):
        """
        The Newton point is a global minimum of the approximate function.
        """
        if self._newton_point is None:
            g = self.jac
            B = self.hess
            cho_info = scipy.linalg.cho_factor(B)
            self._newton_point = -scipy.linalg.cho_solve(cho_info, g)
        return self._newton_point## 标题

当然如果全局最优点在信赖域半径中，则搜所方向为牛顿步，否则在这里插入图片描述
求解 $t$

   def get_boundaries_intersections(self, z, d, trust_radius):
        """
        Solve the scalar quadratic equation ||z + t d|| == trust_radius.
        This is like a line-sphere intersection.
        Return the two values of t, sorted from low to high.
        """
        a = np.dot(d, d)
        b = 2 * np.dot(z, d)
        c = np.dot(z, z) - trust_radius**2
        sqrt_discriminant = math.sqrt(b*b - 4*a*c)
        ta = (-b - sqrt_discriminant) / (2*a)
        tb = (-b + sqrt_discriminant) / (2*a)
        return ta, tb

这个方法对Hessian 矩阵的要求：

  This algorithm requires function values and first and second derivatives.
        It also performs a costly Hessian decomposition for most iterations,
        and the Hessian is required to be positive definite.

class DoglegSubproblem(BaseQuadraticSubproblem):
    """Quadratic subproblem solved by the dogleg method"""

    def solve(self, trust_radius):
        """
        Minimize a function using the dog-leg trust-region algorithm.
        Parameters
        ----------
        trust_radius : float
            We are allowed to wander only this far away from the origin.

        Returns
        -------
        p : ndarray
            The proposed step.
        hits_boundary : bool
            True if the proposed step is on the boundary of the trust region.

        Notes
        -----
        The Hessian is required to be positive definite.

        References
        ----------
        .. [1] Jorge Nocedal and Stephen Wright,
               Numerical Optimization, second edition,
               Springer-Verlag, 2006, page 73.
        """

        # Compute the Newton point.
        # This is the optimum for the quadratic model function.
        # If it is inside the trust radius then return this point.
        p_best = self.newton_point()
        if scipy.linalg.norm(p_best) < trust_radius:
            hits_boundary = False
            return p_best, hits_boundary
            

        # Compute the Cauchy point.
        # This is the predicted optimum along the direction of steepest descent.
        p_u = self.cauchy_point()

        # If the Cauchy point is outside the trust region,
        # then return the point where the path intersects the boundary.
        p_u_norm = scipy.linalg.norm(p_u)
        if p_u_norm >= trust_radius:
            p_boundary = p_u * (trust_radius / p_u_norm)
            hits_boundary = True
            return p_boundary, hits_boundary

        # Compute the intersection of the trust region boundary
        # and the line segment connecting the Cauchy and Newton points.
        # This requires solving a quadratic equation.
        # ||p_u + t*(p_best - p_u)||**2 == trust_radius**2
        # Solve this for positive time t using the quadratic formula.
        _, tb = self.get_boundaries_intersections(p_u, p_best - p_u,
                                                  trust_radius)
        p_boundary = p_u + tb * (p_best - p_u)
        hits_boundary = True
        return p_boundary, hits_boundary

在这里插入图片描述

信赖域完整代码框架

def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
                           subproblem=None, initial_trust_radius=1.0,
                           max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
                           maxiter=None, disp=False, return_all=False,
                           callback=None, **unknown_options):
    """
    Minimization of scalar function of one or more variables using a
    trust-region algorithm.

    Options for the trust-region algorithm are:
        initial_trust_radius : float
            Initial trust radius.
        max_trust_radius : float
            Never propose steps that are longer than this value.
        eta : float
            Trust region related acceptance stringency for proposed steps.
        gtol : float
            Gradient norm must be less than `gtol`
            before successful termination.
        maxiter : int
            Maximum number of iterations to perform.
        disp : bool
            If True, print convergence message.

    This function is called by the `minimize` function.
    It is not supposed to be called directly.
    """
    _check_unknown_options(unknown_options)
    if jac is None:
        raise ValueError('Jacobian is currently required for trust-region '
                         'methods')
    if hess is None and hessp is None:
        raise ValueError('Either the Hessian or the Hessian-vector product '
                         'is currently required for trust-region methods')
    if subproblem is None:
        raise ValueError('A subproblem solving strategy is required for '
                         'trust-region methods')
    if not (0 <= eta < 0.25):
        raise Exception('invalid acceptance stringency')
    if max_trust_radius <= 0:
        raise Exception('the max trust radius must be positive')
    if initial_trust_radius <= 0:
        raise ValueError('the initial trust radius must be positive')
    if initial_trust_radius >= max_trust_radius:
        raise ValueError('the initial trust radius must be less than the '
                         'max trust radius')

    # force the initial guess into a nice format
    x0 = np.asarray(x0).flatten()

    # Wrap the functions, for a couple reasons.
    # This tracks how many times they have been called
    # and it automatically passes the args.
    nfun, fun = wrap_function(fun, args)
    njac, jac = wrap_function(jac, args)
    nhess, hess = wrap_function(hess, args)
    nhessp, hessp = wrap_function(hessp, args)

    # limit the number of iterations
    if maxiter is None:
        maxiter = len(x0)*200

    # init the search status
    warnflag = 0

    # initialize the search
    trust_radius = initial_trust_radius
    x = x0
    if return_all:
        allvecs = [x]
    m = subproblem(x, fun, jac, hess, hessp)
    k = 0

    # search for the function min
    while True:

        # Solve the sub-problem.
        # This gives us the proposed step relative to the current position
        # and it tells us whether the proposed step
        # has reached the trust region boundary or not.
        try:
            p, hits_boundary = m.solve(trust_radius)
        except np.linalg.linalg.LinAlgError as e:
            warnflag = 3
            break

        # calculate the predicted value at the proposed point
        predicted_value = m(p)

        # define the local approximation at the proposed point
        x_proposed = x + p
        m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)

        # evaluate the ratio defined in equation (4.4)
        actual_reduction = m.fun - m_proposed.fun
        predicted_reduction = m.fun - predicted_value
        if predicted_reduction <= 0:
            warnflag = 2
            break
        rho = actual_reduction / predicted_reduction

        # update the trust radius according to the actual/predicted ratio
        if rho < 0.25:
            trust_radius *= 0.25
        elif rho > 0.75 and hits_boundary:
            trust_radius = min(2*trust_radius, max_trust_radius)

        # if the ratio is high enough then accept the proposed step
        if rho > eta:
            x = x_proposed
            m = m_proposed

        # append the best guess, call back, increment the iteration count
        if return_all:
            allvecs.append(x)
        if callback is not None:
            callback(x)
        k += 1

        # check if the gradient is small enough to stop
        if m.jac_mag < gtol:
            warnflag = 0
            break

        # check if we have looked at enough iterations
        if k >= maxiter:
            warnflag = 1
            break

    # print some stuff if requested
    status_messages = (
            _status_message['success'],
            _status_message['maxiter'],
            'A bad approximation caused failure to predict improvement.',
            'A linalg error occurred, such as a non-psd Hessian.',
            )
    if disp:
        if warnflag == 0:
            print(status_messages[warnflag])
        else:
            print('Warning: ' + status_messages[warnflag])
        print("         Current function value: %f" % m.fun)
        print("         Iterations: %d" % k)
        print("         Function evaluations: %d" % nfun[0])
        print("         Gradient evaluations: %d" % njac[0])
        print("         Hessian evaluations: %d" % nhess[0])

    result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
                            fun=m.fun, jac=m.jac, nfev=nfun[0], njev=njac[0],
                            nhev=nhess[0], nit=k,
                            message=status_messages[warnflag])

    if hess is not None:
        result['hess'] = m.hess

    if return_all:
        result['allvecs'] = allvecs

    return result

自己改写后的学习版本

Succeess! information is
         初始点x0: [15, 100]
         初始函数值: 1562696.000000
         Iterations: 66
         optimal x:  [ 1.  1.]
         Current function value: 0.000000
         Gradient x [[ 802.00000276 -400.00000066]
 [-400.00000066  200.        ]]

"""Dog-leg trust-region optimization."""


import math
import numpy as np
import scipy.linalg
from scipy.optimize import minimize, rosen, rosen_der,rosen_hess

def cauchy_point(g,B):
    Bg = np.dot(B,g)
    cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * g
    return cauchy_point
#  利用LU分解求解牛顿步
def newton_point(g,B):
    cho_info = scipy.linalg.cho_factor(B)
    newton_point = -scipy.linalg.cho_solve(cho_info, g)
    return newton_point

def solve(trust_radius,g,B):
    # print(trust_radius)
    p_best = newton_point(g,B)
    # 如果牛顿步在信赖域中，则利用牛顿步更新
    if scipy.linalg.norm(p_best) < trust_radius:
        hits_boundary = False
        return p_best, hits_boundary
    p_u = cauchy_point(g,B)
    p_u_norm = scipy.linalg.norm(p_u)
    # 如果最速下降方向（步长求出），在信赖域外，则返回其与信赖域的交点
    if p_u_norm >= trust_radius:
        p_boundary = p_u * (trust_radius / p_u_norm)
        hits_boundary = True
        return p_boundary, hits_boundary
    # 否则的话，则是二者的线性组合
    _, tb = get_boundaries_intersections(p_u, p_best - p_u,
                                                  trust_radius)
    p_boundary = p_u + tb * (p_best - p_u)
    hits_boundary = True
    return p_boundary, hits_boundary


def get_boundaries_intersections( z, d, trust_radius):
    a = np.dot(d, d)
    b = 2 * np.dot(z, d)
    c = np.dot(z, z) - trust_radius**2
    sqrt_discriminant = math.sqrt(b*b - 4*a*c)
    ta = (-b - sqrt_discriminant) / (2*a)
    tb = (-b + sqrt_discriminant) / (2*a)
    return ta, tb


def minimize_trust_region( x0,eta=0.15, initial_trust_radius = 1.0,max_trust_radius = 1000.0,gtol = 1e-4):
    warnflag = 0
    trust_radius = initial_trust_radius
    x = x0
    k = 0


    while True:

        p, hits_boundary = solve(trust_radius,rosen_der(x),rosen_hess(x))
        # hits_boundary 表示has reached the trust region boundary or not.

        x_proposed=x+p
        # calculate the predicted value at the proposed point
        predicted_value =rosen(x)+np.dot(rosen_der(x), p) + 0.5 * np.dot(p, np.dot(rosen_hess(x), p))
        actual_reduction = rosen(x) - rosen(x_proposed)
        predicted_reduction = rosen(x) - predicted_value


        # update the trust radius according to the actual/predicted ratio
        rho = actual_reduction / predicted_reduction
        if rho < 0.25:
            trust_radius *= 0.25
        elif rho > 0.75 and hits_boundary:
            trust_radius = min(2*trust_radius, max_trust_radius)

        # if the ratio is high enough then accept the proposed step
        if rho > eta:
            x = x_proposed

        k += 1
        # check if the gradient is small enough to stop
        if scipy.linalg.norm(rosen_der(x))< gtol:
            warnflag = 0
            break


    if warnflag == 0:
        print("Succeess! information is")
        print("         初始点x0:" ,x0)
        print("         初始函数值: %f" % rosen(x0))
        print("         Iterations: %d" % k)

        print("         optimal x: ",x)
        print("         Current function value: %f" % rosen(x))
        print("         Gradient x" , rosen_hess(x))


if __name__ == '__main__':
    x0 = [15, 100]
    minimize_trust_region(x0,0.15)