简介
上一节中我们介绍了牛顿法在多维变量极值中的应用,牛顿法虽然收敛速度很快,但是当初始点离极小值点过远时,迭代可能不收敛,为了克服这个缺点,这节我们来介绍阻尼牛顿法。在牛顿法中迭代步长
λ
k
\lambda_k
λk总是取1,而在阻尼牛顿法中,每一步的迭代步长通过一维搜索来确定,一维搜索方法已经介绍过很多,在此不在赘述。牛顿法详细内容见上一节。结合上述内容,阻尼牛顿法近似极小点的迭代公式如下:
x
k
+
1
=
x
k
−
λ
k
(
▽
2
f
(
x
k
)
)
−
1
▽
f
(
x
k
)
\bm{x}^{k+1}=\bm{x}^{k}-\lambda_k(\bigtriangledown^{2}f(\bm{x}^k))^{-1}\bigtriangledown f(\bm{x}^{k})
xk+1=xk−λk(▽2f(xk))−1▽f(xk)
其中
λ
k
\lambda_k
λk通过一维搜索方法得到,即采用一维搜索求解
f
(
x
k
+
λ
k
s
k
)
=
m
i
n
λ
≥
0
f
(
x
k
+
λ
s
k
)
f(\bm{x}^k+\lambda_k\bm{s}^k)=\underset{\lambda\geq0}{min}f(\bm{x}^k+\lambda\bm{s}^k)
f(xk+λksk)=λ≥0minf(xk+λsk)的极小值点。
具体实施步骤如下:
- 给定初始点 x 0 x_0 x0,精度 ε > 0 \varepsilon>0 ε>0,令 k = 0 k=0 k=0;
- 计算 ▽ f ( x k ) \bigtriangledown f(\bm{x}^k) ▽f(xk),若 ∥ ▽ f ( x k ) ∥ < ε \lVert\bigtriangledown f(\bm{x}^k)\rVert<\varepsilon ∥▽f(xk)∥<ε成立,则迭代停止, x k \bm{x}^k xk即所求点;反之,计算 ▽ 2 f ( x k ) \bigtriangledown^2f(\bm{x}^k) ▽2f(xk)及 s = − [ ▽ 2 f ( x k ) ] − 1 ▽ f ( x k ) \bm{s}=-[\bigtriangledown^2f(\bm{x}^k)]^{-1}\bigtriangledown f(\bm{x}^k) s=−[▽2f(xk)]−1▽f(xk),转到步骤3;
- 沿着 s k \bm{s}^k sk进行一维搜索,计算步长 λ k \lambda_k λk,跳转步骤4;
- 令 x k + 1 = x k + λ k s k , k = k + 1 \bm{x}^{k+1}=\bm{x}^k+\lambda_k\bm{s}^k,k=k+1 xk+1=xk+λksk,k=k+1,跳转步骤2。
优化问题
求解 f ( x 1 , x 2 ) = x 1 2 + 2 x 2 2 − 4 x 1 − 2 x 1 x 2 f(x_1,x_2)=x_1^2+2x_2^2-4x_1-2x_1x_2 f(x1,x2)=x12+2x22−4x1−2x1x2的极小值点,给定初始点 x 0 = [ 5000 , 0 ] T , ε = 1 e − 3 x_0=[5000, 0]^T,\varepsilon=1e^{-3} x0=[5000,0]T,ε=1e−3,步长采用不精确直接搜索法进行搜索,计算程序如下所示:
import numpy as np
def cal_fun_f(x1, x2):
"""
函数值计算函数
"""
return x1**2+2*x2**2-4*x1-2*x1*x2
def partial_derivative_x1(x1, x2):
"""
求解函数对x1的偏导数
:param x1:
:param x2:
"""
return 2*x1-4-2*x2
def partial_derivative_x2(x1, x2):
"""
求解函数对x2的偏导数
:param x1:
:param x2:
"""
return 4*x2-2*x1
def partial_derivative_x1x2(x1, x2):
"""
求解对x1和x2的二阶偏导数
:param x1:
:param x2:
"""
return -2
def partial_derivative_x1x1(x1, x2):
"""
对x1的二阶偏导数
:param x1:
:param x2:
"""
return 2
def partial_derivative_x2x2(x1, x2):
"""
对x2的二阶偏导数
:param x1:
:param x2:
"""
return 4
def get_derivative_matrix1(x1, x2):
"""
计算一阶偏导数矩阵
:param x1:
:param x2:
"""
derivative_matrix1 = np.full((2, 1), np.nan, dtype=float)
derivative_matrix1[0, 0] = partial_derivative_x1(x1, x2)
derivative_matrix1[1, 0] = partial_derivative_x2(x1, x2)
return derivative_matrix1
def get_derivative_matrix2(x1, x2):
"""
计算二阶偏导数矩阵
:param x1:
:param x2:
"""
derivative_matrix2 = np.full((2, 2), np.nan, dtype=float)
derivative_matrix2[0, 0] = partial_derivative_x1x1(x1, x2)
derivative_matrix2[0, 1] = partial_derivative_x1x2(x1, x2)
derivative_matrix2[1, 0] = partial_derivative_x1x2(x1, x2)
derivative_matrix2[1, 1] = partial_derivative_x2x2(x1, x2)
return derivative_matrix2
def wolfe_powell_rule1(f_x, f_x_, c1, lam, d_x, s):
"""
判断准则1
:param f_x:k点函数值
:param f_x_:k+1点函数值
:param c1:给定常数
:param lam:步长
:param d_x:k点一阶导数
:param s:方向向量
:return:
"""
if (f_x-f_x_) >= (-c1*lam*np.transpose(d_x)[0, :]@s[:, 0]):
return True
else:
return False
def wolfe_powell_rule2(d_x_, s, c2, d_x):
"""
判断准则2
:param d_x_: k+1点一阶导数
:param s: k点方向导数
:param c2: 给定常数
:param d_x: k点一阶导数
"""
if (np.transpose(d_x_)[0, :]@s[:, 0]) >= (c2*np.transpose(d_x)[0, :]@s[:, 0]):
return True
else:
return False
def get_lambda(c1, c2, a, b, lam, x, s):
"""
采用直接法基于wolf_powell准则得到每一步的搜索步长
:param c1:给定阐述
:param c2:给定参数
:param a:步长区间左端点
:param b:步长区间右端点
:param lam:初始步长
:param x: 初始点
:param s: 方向向量
"""
while True:
f_x = cal_fun_f(x[0, 0], x[1, 0])
d_x = get_derivative_matrix1(x[0, 0], x[1, 0])
x_ = x + lam * s # 下一个迭代点
f_x_ = cal_fun_f(x_[0, 0], x_[1, 0])
d_x_ = get_derivative_matrix1(x_[0, 0], x_[1, 0])
rule1_judge = wolfe_powell_rule1(f_x, f_x_, c1, lam, d_x, s)
rule2_judge = wolfe_powell_rule2(d_x_, s, c2, d_x)
if rule1_judge and rule2_judge:
return x_, lam
else:
if not rule1_judge:
b = lam
lam = (lam + a) / 2
if rule1_judge and rule2_judge == False:
a = lam
lam = min(2 * lam, (lam + b) / 2)
def damped_newton_method(x, gap, c1=0.1, c2=0.5, a=0, b=float("inf"), lam=1):
i = 0
while True:
derivative_matrix1 = get_derivative_matrix1(x[0, 0], x[1, 0])
derivative_matrix2 = get_derivative_matrix2(x[0, 0], x[1, 0])
s = -np.linalg.inv(derivative_matrix2)@derivative_matrix1 # 计算方向向量
x, lam = get_lambda(c1, c2, a, b, lam, x, s) # 得到迭代步长并计算下一迭代点
i += 1
print(f"第{i}次迭代极小值点,迭代步长", (x[0, 0], x[1, 0]), lam)
if np.sum(np.abs(get_derivative_matrix1(x[0, 0], x[1, 0])), axis=0) < gap:
break
if __name__ == "__main__":
damped_newton_method(np.array([5000, 0]).reshape(-1, 1), 1e-3)
计算结果如下所示:
第1次迭代极小值点,迭代步长 (4.0, 2.0) 1