计算柯西点列,即最速下降方向
d
B
=
−
g
T
g
g
T
B
g
g
d^B=-\frac{g^Tg}{g^TBg}g
dB=−gTBggTgg,如 果最速下降方向超出了信赖域,返回与信赖域的交点。
这时可以求得方向为
信
赖
域
半
径
除
以
d
U
的
范
数
∗
d
U
信赖域半径除以d^U 的范数* d^U
信赖域半径除以dU的范数∗dU
即p_boundary = p_u * (trust_radius / p_u_norm)
def cauchy_point(self):
"""
The Cauchy point is minimal along the direction of steepest descent.
"""
if self._cauchy_point is None:
g = self.jac
Bg = self.hessp(g)//表示hessian矩阵和一阶导数梯度的内积
self._cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * g
return self._cauchy_point
计算牛顿步,即近似函数 m ( d ) = f + g T d + d T B d m(d)=f+g^Td+d^TBd m(d)=f+gTd+dTBd的全局最优解(不考虑约束(d必须在信赖域半径中)), p B = − B − 1 g p^B=-B^{-1}g pB=−B−1g,这里没有直接求逆矩阵的方法,而是用了LU分解求解线性方程组的形式
def newton_point(self):
"""
The Newton point is a global minimum of the approximate function.
"""
if self._newton_point is None:
g = self.jac
B = self.hess
cho_info = scipy.linalg.cho_factor(B)
self._newton_point = -scipy.linalg.cho_solve(cho_info, g)
return self._newton_point## 标题
当然如果全局最优点在信赖域半径中,则搜所方向为牛顿步,否则
求解
t
t
t
def get_boundaries_intersections(self, z, d, trust_radius):
"""
Solve the scalar quadratic equation ||z + t d|| == trust_radius.
This is like a line-sphere intersection.
Return the two values of t, sorted from low to high.
"""
a = np.dot(d, d)
b = 2 * np.dot(z, d)
c = np.dot(z, z) - trust_radius**2
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
ta = (-b - sqrt_discriminant) / (2*a)
tb = (-b + sqrt_discriminant) / (2*a)
return ta, tb
这个方法对Hessian 矩阵的要求:
This algorithm requires function values and first and second derivatives.
It also performs a costly Hessian decomposition for most iterations,
and the Hessian is required to be positive definite.
class DoglegSubproblem(BaseQuadraticSubproblem):
"""Quadratic subproblem solved by the dogleg method"""
def solve(self, trust_radius):
"""
Minimize a function using the dog-leg trust-region algorithm.
Parameters
----------
trust_radius : float
We are allowed to wander only this far away from the origin.
Returns
-------
p : ndarray
The proposed step.
hits_boundary : bool
True if the proposed step is on the boundary of the trust region.
Notes
-----
The Hessian is required to be positive definite.
References
----------
.. [1] Jorge Nocedal and Stephen Wright,
Numerical Optimization, second edition,
Springer-Verlag, 2006, page 73.
"""
# Compute the Newton point.
# This is the optimum for the quadratic model function.
# If it is inside the trust radius then return this point.
p_best = self.newton_point()
if scipy.linalg.norm(p_best) < trust_radius:
hits_boundary = False
return p_best, hits_boundary
# Compute the Cauchy point.
# This is the predicted optimum along the direction of steepest descent.
p_u = self.cauchy_point()
# If the Cauchy point is outside the trust region,
# then return the point where the path intersects the boundary.
p_u_norm = scipy.linalg.norm(p_u)
if p_u_norm >= trust_radius:
p_boundary = p_u * (trust_radius / p_u_norm)
hits_boundary = True
return p_boundary, hits_boundary
# Compute the intersection of the trust region boundary
# and the line segment connecting the Cauchy and Newton points.
# This requires solving a quadratic equation.
# ||p_u + t*(p_best - p_u)||**2 == trust_radius**2
# Solve this for positive time t using the quadratic formula.
_, tb = self.get_boundaries_intersections(p_u, p_best - p_u,
trust_radius)
p_boundary = p_u + tb * (p_best - p_u)
hits_boundary = True
return p_boundary, hits_boundary
信赖域完整 代码框架
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
subproblem=None, initial_trust_radius=1.0,
max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
maxiter=None, disp=False, return_all=False,
callback=None, **unknown_options):
"""
Minimization of scalar function of one or more variables using a
trust-region algorithm.
Options for the trust-region algorithm are:
initial_trust_radius : float
Initial trust radius.
max_trust_radius : float
Never propose steps that are longer than this value.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than `gtol`
before successful termination.
maxiter : int
Maximum number of iterations to perform.
disp : bool
If True, print convergence message.
This function is called by the `minimize` function.
It is not supposed to be called directly.
"""
_check_unknown_options(unknown_options)
if jac is None:
raise ValueError('Jacobian is currently required for trust-region '
'methods')
if hess is None and hessp is None:
raise ValueError('Either the Hessian or the Hessian-vector product '
'is currently required for trust-region methods')
if subproblem is None:
raise ValueError('A subproblem solving strategy is required for '
'trust-region methods')
if not (0 <= eta < 0.25):
raise Exception('invalid acceptance stringency')
if max_trust_radius <= 0:
raise Exception('the max trust radius must be positive')
if initial_trust_radius <= 0:
raise ValueError('the initial trust radius must be positive')
if initial_trust_radius >= max_trust_radius:
raise ValueError('the initial trust radius must be less than the '
'max trust radius')
# force the initial guess into a nice format
x0 = np.asarray(x0).flatten()
# Wrap the functions, for a couple reasons.
# This tracks how many times they have been called
# and it automatically passes the args.
nfun, fun = wrap_function(fun, args)
njac, jac = wrap_function(jac, args)
nhess, hess = wrap_function(hess, args)
nhessp, hessp = wrap_function(hessp, args)
# limit the number of iterations
if maxiter is None:
maxiter = len(x0)*200
# init the search status
warnflag = 0
# initialize the search
trust_radius = initial_trust_radius
x = x0
if return_all:
allvecs = [x]
m = subproblem(x, fun, jac, hess, hessp)
k = 0
# search for the function min
while True:
# Solve the sub-problem.
# This gives us the proposed step relative to the current position
# and it tells us whether the proposed step
# has reached the trust region boundary or not.
try:
p, hits_boundary = m.solve(trust_radius)
except np.linalg.linalg.LinAlgError as e:
warnflag = 3
break
# calculate the predicted value at the proposed point
predicted_value = m(p)
# define the local approximation at the proposed point
x_proposed = x + p
m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
# evaluate the ratio defined in equation (4.4)
actual_reduction = m.fun - m_proposed.fun
predicted_reduction = m.fun - predicted_value
if predicted_reduction <= 0:
warnflag = 2
break
rho = actual_reduction / predicted_reduction
# update the trust radius according to the actual/predicted ratio
if rho < 0.25:
trust_radius *= 0.25
elif rho > 0.75 and hits_boundary:
trust_radius = min(2*trust_radius, max_trust_radius)
# if the ratio is high enough then accept the proposed step
if rho > eta:
x = x_proposed
m = m_proposed
# append the best guess, call back, increment the iteration count
if return_all:
allvecs.append(x)
if callback is not None:
callback(x)
k += 1
# check if the gradient is small enough to stop
if m.jac_mag < gtol:
warnflag = 0
break
# check if we have looked at enough iterations
if k >= maxiter:
warnflag = 1
break
# print some stuff if requested
status_messages = (
_status_message['success'],
_status_message['maxiter'],
'A bad approximation caused failure to predict improvement.',
'A linalg error occurred, such as a non-psd Hessian.',
)
if disp:
if warnflag == 0:
print(status_messages[warnflag])
else:
print('Warning: ' + status_messages[warnflag])
print(" Current function value: %f" % m.fun)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % nfun[0])
print(" Gradient evaluations: %d" % njac[0])
print(" Hessian evaluations: %d" % nhess[0])
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
fun=m.fun, jac=m.jac, nfev=nfun[0], njev=njac[0],
nhev=nhess[0], nit=k,
message=status_messages[warnflag])
if hess is not None:
result['hess'] = m.hess
if return_all:
result['allvecs'] = allvecs
return result
自己改写后的学习版本
Succeess! information is
初始点x0: [15, 100]
初始函数值: 1562696.000000
Iterations: 66
optimal x: [ 1. 1.]
Current function value: 0.000000
Gradient x [[ 802.00000276 -400.00000066]
[-400.00000066 200. ]]
"""Dog-leg trust-region optimization."""
import math
import numpy as np
import scipy.linalg
from scipy.optimize import minimize, rosen, rosen_der,rosen_hess
def cauchy_point(g,B):
Bg = np.dot(B,g)
cauchy_point = -(np.dot(g, g) / np.dot(g, Bg)) * g
return cauchy_point
# 利用LU分解求解牛顿步
def newton_point(g,B):
cho_info = scipy.linalg.cho_factor(B)
newton_point = -scipy.linalg.cho_solve(cho_info, g)
return newton_point
def solve(trust_radius,g,B):
# print(trust_radius)
p_best = newton_point(g,B)
# 如果牛顿步在信赖域中,则利用牛顿步更新
if scipy.linalg.norm(p_best) < trust_radius:
hits_boundary = False
return p_best, hits_boundary
p_u = cauchy_point(g,B)
p_u_norm = scipy.linalg.norm(p_u)
# 如果最速下降方向(步长求出),在信赖域外,则返回其与信赖域的交点
if p_u_norm >= trust_radius:
p_boundary = p_u * (trust_radius / p_u_norm)
hits_boundary = True
return p_boundary, hits_boundary
# 否则的话,则是二者的线性组合
_, tb = get_boundaries_intersections(p_u, p_best - p_u,
trust_radius)
p_boundary = p_u + tb * (p_best - p_u)
hits_boundary = True
return p_boundary, hits_boundary
def get_boundaries_intersections( z, d, trust_radius):
a = np.dot(d, d)
b = 2 * np.dot(z, d)
c = np.dot(z, z) - trust_radius**2
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
ta = (-b - sqrt_discriminant) / (2*a)
tb = (-b + sqrt_discriminant) / (2*a)
return ta, tb
def minimize_trust_region( x0,eta=0.15, initial_trust_radius = 1.0,max_trust_radius = 1000.0,gtol = 1e-4):
warnflag = 0
trust_radius = initial_trust_radius
x = x0
k = 0
while True:
p, hits_boundary = solve(trust_radius,rosen_der(x),rosen_hess(x))
# hits_boundary 表示has reached the trust region boundary or not.
x_proposed=x+p
# calculate the predicted value at the proposed point
predicted_value =rosen(x)+np.dot(rosen_der(x), p) + 0.5 * np.dot(p, np.dot(rosen_hess(x), p))
actual_reduction = rosen(x) - rosen(x_proposed)
predicted_reduction = rosen(x) - predicted_value
# update the trust radius according to the actual/predicted ratio
rho = actual_reduction / predicted_reduction
if rho < 0.25:
trust_radius *= 0.25
elif rho > 0.75 and hits_boundary:
trust_radius = min(2*trust_radius, max_trust_radius)
# if the ratio is high enough then accept the proposed step
if rho > eta:
x = x_proposed
k += 1
# check if the gradient is small enough to stop
if scipy.linalg.norm(rosen_der(x))< gtol:
warnflag = 0
break
if warnflag == 0:
print("Succeess! information is")
print(" 初始点x0:" ,x0)
print(" 初始函数值: %f" % rosen(x0))
print(" Iterations: %d" % k)
print(" optimal x: ",x)
print(" Current function value: %f" % rosen(x))
print(" Gradient x" , rosen_hess(x))
if __name__ == '__main__':
x0 = [15, 100]
minimize_trust_region(x0,0.15)