min
∑
i
=
1
11
(
y
i
−
f
~
(
x
,
t
i
)
)
2
,
\min\:\sum_{i=1}^{11}\big(y_i-\tilde{f}(x,t_i)\big)^2,
mini=1∑11(yi−f~(x,ti))2,
其中
f
~
(
x
,
t
)
=
x
1
(
t
2
+
x
2
t
)
t
2
+
x
3
t
+
x
4
,
\tilde{f}(x,t)=\frac{x_1(t^2+x_2t)}{t^2+x_3t+x_4},
f~(x,t)=t2+x3t+x4x1(t2+x2t),
数据
(
t
i
,
y
i
)
(t_i,y_i)
(ti,yi)由表 5.5 给出。
表 5.5 第 4 题的数据
2 | $t_i$ | $y_i$ | $i$ | $t_i$ | $y_i$ |
---|---|---|---|---|---|
1 | 4.0000 | 0.1957 | 7 | 0.1250 | 0.0456 |
2 | 2.0000 | 0.1947 | 8 | 0.1000 | 0.0342 |
3 | 1.0000 | 0.1735 | 9 | 0.0833 | 0.0323 |
4 | 0.5000 | 0.1600 | 10 | 0.0714 | 0.0235 |
5 | 0.2500 | 0.0844 | 11 | 0.0625 | 0.0246 |
6 | 0.1670 | 0.0627 |
最速下降法求解
from scipy.optimize import approx_fprime
import numpy as np
# 定义数据点
t_data = np.array([4.0000, 2.0000, 1.0000, 0.5000, 0.2500, 0.1670, 0.1250, 0.1000, 0.0833, 0.0714, 0.0625])
y_data = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 定义模型函数
def model(x, t):
x1, x2, x3, x4 = x
return (x1 * (t**2 + x2*t)) / (t**2 + x3*t + x4)
# 定义要最小化的残差函数
def residuals(x, t, y):
return model(x, t) - y
# 定义目标函数(总的误差平方和)
def objective_function(x, t, y):
return np.sum((model(x, t) - y)**2)
# 使用数值梯度进行梯度计算
def gradient(x, t, y):
eps = np.sqrt(np.finfo(float).eps)
return approx_fprime(x, objective_function, eps, t, y)
# 最速下降法
def gradient_descent(x_init, t, y, alpha, max_iterations=10000, tolerance=1e-6):
x = x_init
for i in range(max_iterations):
grad = gradient(x, t, y)
x_new = x - alpha * grad
# 检查收敛性
if np.linalg.norm(x_new - x) < tolerance:
break
x = x_new
# 计算最优参数的损失值
loss = objective_function(x, t, y)
return x, i, loss
# 初始猜测和学习率
x_init = np.array([0.2, 0.2, 0.1, 0.1])
alpha = 0.1
# 运行最速下降法
x_opt, num_iterations, loss = gradient_descent(x_init, t_data, y_data, alpha)
# 打印结果
print("Number of Iterations:", num_iterations)
print("Optimized Parameters:", x_opt)
print("Final Loss:", loss)
Number of Iterations: 3640
Optimized Parameters: [0.19292142 0.18828542 0.12222641 0.13473105]
Final Loss: 0.00030752241531585274
设置了较小的容忍度和较小的学习率得到如上结果,迭代次数和学习率相关,但是收敛结果基本一致。
牛顿法
import numpy as np
import matplotlib.pyplot as plt
# 拟合点
t_values = np.array([4.0000, 2.0000, 1.0000, 0.5000, 0.2500, 0.1670, 0.1250, 0.1000, 0.0833, 0.0714, 0.0625])
y_values = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 定义模型函数
def func(x, t):
return (x[0] * (t**2 + x[1]*t)) / (t**2 + x[2]*t + x[3])
# 目标函数
def loss(x):
return np.sum((y_values - func(x, t_values))**2)
# 目标函数的梯度
def grad_loss(x):
epsilon = 1e-9
grad = np.zeros_like(x)
for i in range(len(x)):
x1 = np.copy(x)
x2 = np.copy(x)
x1[i] += epsilon
x2[i] -= epsilon
grad[i] = (loss(x1) - loss(x2)) / (2 * epsilon)
return grad
# 数值方法计算Hessian矩阵
def numerical_hessian(f, x, h=1e-5):
n = len(x)
H = np.zeros((n, n))
for i in range(n):
for j in range(n):
x_ijp = np.array(x, dtype=float)
x_ijp[i] += h
x_ijp[j] += h
x_ip = np.array(x, dtype=float)
x_ip[i] += h
x_jp = np.array(x, dtype=float)
x_jp[j] += h
x_orig = np.array(x, dtype=float)
H[i, j] = (f(x_ijp) - f(x_ip) - f(x_jp) + f(x_orig)) / (h**2)
return H
# 牛顿法
def newton_method(f, grad_f, hessian_f, x0, epsilon=1e-6, max_iter=1000):
xk = np.array(x0, dtype=float)
step_count = 0
for k in range(max_iter):
grad = grad_f(xk)
if np.linalg.norm(grad) <= epsilon:
break
H = hessian_f(f, xk)
dk = -np.linalg.solve(H, grad) # 求解 dk
xk += dk # 更新位置
step_count += 1
return xk, step_count # 返回位置和迭代次数
# 初始猜测
x0 = np.array([0.2, 0.1, 0.1, 0.1])
# 运行牛顿法
result, num_iterations = newton_method(loss, grad_loss, numerical_hessian, x0)
print("优化结果:", result)
print("迭代次数:", num_iterations)
print("最终损失:", loss(result))
优化结果: [0.19280693 0.19128247 0.12305651 0.1360624 ]
迭代次数: 5
最终损失: 0.00030750560384928163
牛顿法的求解结果一般,损失值很大,存在奇异矩阵的问题
阻尼牛顿法
import numpy as np
def model(x, t):
x1, x2, x3, x4 = x
return x1 * (t**2 + x2 * t) / (t**2 + x3 * t + x4)
def compute_residuals(x, t_data, y_data):
return y_data - model(x, t_data)
def compute_jacobian(x, t_data):
x1, x2, x3, x4 = x
J = np.zeros((len(t_data), len(x)))
for i, t in enumerate(t_data):
f = model(x, t)
d1 = (t**2 + x2 * t) / (t**2 + x3 * t + x4)
d2 = x1 * t / (t**2 + x3 * t + x4)
d3 = -x1 * (t**2 + x2 * t) * t / (t**2 + x3 * t + x4)**2
d4 = -x1 * (t**2 + x2 * t) / (t**2 + x3 * t + x4)**2
J[i, :] = [-d1, -d2, -d3, -d4]
return J
def damp_newton_method(t_data, y_data, initial_params, max_iter=100, tol=1e-6):
x = np.array(initial_params)
lambda_ = 0.01
num_iterations = 0
for _ in range(max_iter):
num_iterations += 1
r = compute_residuals(x, t_data, y_data)
J = compute_jacobian(x, t_data)
A = J.T @ J
g = J.T @ r
I = np.eye(len(x))
delta = np.linalg.solve(A + lambda_ * I, -g)
if np.linalg.norm(delta) < tol:
break
x_new = x + delta
if np.sum(compute_residuals(x_new, t_data, y_data)**2) < np.sum(r**2):
x = x_new
lambda_ = max(lambda_ / 10, 1e-7) # 减小 lambda_
else:
lambda_ *= 10 # 增大 lambda_
loss = np.sum(compute_residuals(x, t_data, y_data)**2)
return x, loss, num_iterations
# 使用初始参数进行拟合
initial_params = [2, 1, 1, 1]
final_params, loss, num_iterations = damp_newton_method(t_data, y_data, initial_params)
print("Optimized Parameters:", final_params)
print("Loss:", loss)
print("Number of Iterations:", num_iterations)
# 绘制原始数据和拟合曲线
import matplotlib.pyplot as plt
plt.scatter(t_data, y_data, color='red', label='Data')
t_fine = np.linspace(min(t_data), max(t_data), 500)
y_fine = model(final_params, t_fine)
plt.plot(t_fine, y_fine, label='Fitted model', color='blue')
plt.title('Data and Fitted Model')
plt.xlabel('$t_i$')
plt.ylabel('$y_i$')
plt.legend()
plt.show()
Optimized Parameters: [0.19280696 0.19128187 0.12305642 0.13606212]
Loss: 0.00030750560384964413
Number of Iterations: 48
需要多次调整整个参数,效果一般
拟牛顿法
import numpy as np
from scipy.optimize import minimize
# 数据点
t_values = np.array([4.0000, 2.0000, 1.0000, 0.5000, 0.2500, 0.1670, 0.1250, 0.1000, 0.0833, 0.0714, 0.0625])
y_values = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 定义模型函数
def func(x, t):
return (x[0] * (t**2 + x[1]*t)) / (t**2 + x[2]*t + x[3])
# 损失函数
def loss(x):
residuals = y_values - func(x, t_values)
return np.sum(residuals**2)
# 使用Scipy的minimize函数进行优化,选择'BFGS'方法
x0 = [0.1, 0.1, 0.1, 0.1] # 初始猜测
result = minimize(loss, x0, method='BFGS', options={'disp': True})
print("Optimized parameters:", result.x)
print("Final loss:", result.fun)
# 绘制拟合结果
import matplotlib.pyplot as plt
t_plot = np.linspace(min(t_values), max(t_values), 300)
y_plot = func(result.x, t_plot)
plt.scatter(t_values, y_values, label='Data Points')
plt.plot(t_plot, y_plot, 'r-', label='Fitted Model')
plt.xlabel('t')
plt.ylabel('y')
plt.legend()
plt.title('Data Fit using BFGS Method')
plt.show()
Optimization terminated successfully.
Current function value: 0.000308
Iterations: 20
Function evaluations: 120
Gradient evaluations: 24
Optimized parameters: [0.19279486 0.1917488 0.12322904 0.13626871]
Final loss: 0.0003075060486785637
上述使用scipy中已经定义好的BFGS算法,效果相当好
import numpy as np
import matplotlib.pyplot as plt
# 数据点
t_values = np.array([4.0000, 2.0000, 1.0000, 0.5000, 0.2500, 0.1670, 0.1250, 0.1000, 0.0833, 0.0714, 0.0625])
y_values = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 模型函数
def func(x, t):
return (x[0] * (t**2 + x[1]*t)) / (t**2 + x[2]*t + x[3])
# 损失函数
def loss(x):
return np.sum((y_values - func(x, t_values))**2)
# 损失函数的梯度
def grad_loss(x):
epsilon = 1e-8
grad = np.zeros_like(x)
for i in range(len(x)):
x1 = np.copy(x)
x2 = np.copy(x)
x1[i] += epsilon
x2[i] -= epsilon
grad[i] = (loss(x1) - loss(x2)) / (2 * epsilon)
return grad
# BFGS方法实现
def bfgs_method(x0, max_iterations=100, epsilon=1e-5):
xk = np.copy(x0)
n = len(xk)
Hk = np.eye(n) # 初始化近似Hessian矩阵的逆为单位矩阵
for iteration in range(max_iterations):
grad_fk = grad_loss(xk)
if np.linalg.norm(grad_fk) < epsilon:
break
# 计算搜索方向
pk = -np.dot(Hk, grad_fk)
# 线搜索找到步长, 这里使用简单的回退线搜索
alpha = 1.0
while loss(xk + alpha * pk) > loss(xk) + 0.0001 * alpha * np.dot(grad_fk, pk):
alpha *= 0.5
# 更新xk
xk_new = xk + alpha * pk
sk = xk_new - xk
yk = grad_loss(xk_new) - grad_fk
if np.dot(sk, yk) > 0:
rho_k = 1.0 / np.dot(yk, sk)
I = np.eye(n)
Hk = (I - rho_k * np.outer(sk, yk)) @ Hk @ (I - rho_k * np.outer(yk, sk)) + rho_k * np.outer(sk, sk)
xk = xk_new
return xk, iteration+1
# 初始化参数
x0 = np.array([0.2, 0.2, 0.1, 0.1])
# 使用BFGS方法优化
optimized_params, num_iterations = bfgs_method(x0)
print("Optimized parameters:", optimized_params)
print("Final loss:", loss(optimized_params))
print("Number of Iterations:", num_iterations)
# 绘制拟合结果
t_plot = np.linspace(min(t_values), max(t_values), 300)
y_plot = func(optimized_params, t_plot)
plt.scatter(t_values, y_values, label='Data Points')
plt.plot(t_plot, y_plot, 'r-', label='Fitted Model')
plt.xlabel('t')
plt.ylabel('y')
plt.legend()
plt.title('Data Fit using BFGS Method')
plt.show()
Optimized parameters: [0.19279997 0.19139305 0.12306114 0.13611461]
Final loss: 0.00030750563072822656
Number of Iterations: 22
此处使用自己实现的BFGS算法,效果比牛顿法要好很多
共轭牛顿法
import numpy as np
import matplotlib.pyplot as plt
# 数据点
t_values = np.array([4.0000, 2.0000, 1.0000, 0.5000, 0.2500, 0.1670, 0.1250, 0.1000, 0.0833, 0.0714, 0.0625])
y_values = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 模型函数
def func(x, t):
return (x[0] * (t**2 + x[1]*t)) / (t**2 + x[2]*t + x[3])
# 损失函数
def loss(x):
return np.sum((y_values - func(x, t_values))**2)
# 损失函数的梯度
def grad_loss(x, epsilon=1e-8):
grad = np.zeros_like(x)
for i in range(len(x)):
x1 = np.copy(x)
x2 = np.copy(x)
x1[i] += epsilon
x2[i] -= epsilon
grad[i] = (loss(x1) - loss(x2)) / (2 * epsilon)
return grad
# 共轭梯度法
def conjugate_gradient_method(x0, max_iter=1000, epsilon=1e-6):
x = np.copy(x0)
g = grad_loss(x)
d = -g
num_iterations = 0
for i in range(max_iter):
num_iterations += 1
if np.linalg.norm(g) < epsilon:
print(f"Converged in {num_iterations} iterations.")
break
# 简单的线搜索:找到合适的alpha
alpha = 1.0
while loss(x + alpha * d) > loss(x) + 0.01 * alpha * np.dot(g, d):
alpha *= 0.5
x_new = x + alpha * d
g_new = grad_loss(x_new)
beta = np.dot(g_new, g_new) / np.dot(g, g) # 使用FR公式
d_new = -g_new + beta * d
x, g, d = x_new, g_new, d_new
return x, num_iterations
# 初始参数猜测
x0 = np.array([0.2, 0.2, 0.1, 0.1])
# 使用共轭梯度法求解
optimized_params, num_iterations = conjugate_gradient_method(x0)
print("Optimized parameters:", optimized_params)
print("Final loss:", loss(optimized_params))
print("Number of Iterations:", num_iterations)
# 绘制拟合结果
t_plot = np.linspace(min(t_values), max(t_values), 300)
y_plot = func(optimized_params, t_plot)
plt.scatter(t_values, y_values, label='Data Points')
plt.plot(t_plot, y_plot, 'r-', label='Fitted Model')
plt.xlabel('t')
plt.ylabel('y')
plt.legend()
plt.title('Data Fit using Conjugate Gradient Method')
plt.show()
Converged in 551 iterations.
Optimized parameters: [0.19281309 0.19123342 0.12308906 0.13603464]
Final loss: 0.0003075056254314333
Number of Iterations: 551
共轭梯度法虽然参数与上述求解结果有差异,但是拟合结果与损失值均较好
LMF
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# 数据点
t_data = np.array([4.0, 2.0, 1.0, 0.5, 0.25, 0.167, 0.125, 0.1, 0.0833, 0.0714, 0.0625])
y_data = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
# 定义模型函数
def model(t, x1, x2, x3, x4):
return x1 * (t**2 + x2 * t) / (t**2 + x3 * t + x4)
# 初始参数估计
initial_params = [1, 1, 1, 1]
# 应用 Levenberg-Marquardt 算法进行拟合
params, params_covariance = curve_fit(model, t_data, y_data, p0=initial_params)
print(params)
# 绘制原始数据点
plt.scatter(t_data, y_data, color='red', label='Data')
# 绘制拟合曲线
t_fine = np.linspace(min(t_data), max(t_data), 500)
y_fine = model(t_fine, *params)
plt.plot(t_fine, y_fine, color='blue', label='Fitted model')
# 设置图表标题和图例
plt.title('Fit of Model to Data')
plt.xlabel('$t_i$')
plt.ylabel('$y_i$')
plt.legend()
# 显示图形
plt.show()
[0.19280625 0.19129783 0.12305945 0.1360695 ]
使用内置的curve fit方法效果很好
import numpy as np
# 数据点
t_data = np.array([4.0, 2.0, 1.0, 0.5, 0.25, 0.167, 0.125, 0.1, 0.0833, 0.0714, 0.0625])
y_data = np.array([0.1957, 0.1947, 0.1735, 0.1600, 0.0844, 0.0627, 0.0456, 0.0342, 0.0323, 0.0235, 0.0246])
def model(x, t):
x1, x2, x3, x4 = x
return x1 * (t**2 + x2 * t) / (t**2 + x3 * t + x4)
def compute_residuals(x, t_data, y_data):
return y_data - model(x, t_data)
def compute_jacobian(x, t_data):
x1, x2, x3, x4 = x
J = np.zeros((len(t_data), len(x)))
for i, t in enumerate(t_data):
f = model(x, t)
d1 = (t**2 + x2 * t) / (t**2 + x3 * t + x4)
d2 = x1 * t / (t**2 + x3 * t + x4)
d3 = -x1 * (t**2 + x2 * t) * t / (t**2 + x3 * t + x4)**2
d4 = -x1 * (t**2 + x2 * t) / (t**2 + x3 * t + x4)**2
J[i, :] = [-d1, -d2, -d3, -d4]
return J
def levenberg_marquardt(t_data, y_data, initial_params, max_iter=100, tol=1e-6):
x = np.array(initial_params)
lambda_ = 0.01
nu = 10.0
for _ in range(max_iter):
r = compute_residuals(x, t_data, y_data)
J = compute_jacobian(x, t_data)
A = J.T @ J
g = J.T @ r
I = np.eye(len(x))
delta = np.linalg.solve(A + lambda_ * I, -g)
if np.linalg.norm(delta) < tol:
break
x_new = x + delta
if np.sum(compute_residuals(x_new, t_data, y_data)**2) < np.sum(r**2):
x = x_new
lambda_ /= nu
else:
lambda_ *= nu
return x
# 使用初始参数进行拟合
initial_params = [2, 1, 1, 1]
final_params = levenberg_marquardt(t_data, y_data, initial_params)
print("Optimized Parameters:", final_params)
def compute_loss(x, t_data, y_data):
residuals = compute_residuals(x, t_data, y_data)
return np.sum(residuals**2)
# 在主函数中添加以下代码来计算并打印损失值
loss = compute_loss(final_params, t_data, y_data)
print("Final Loss:", loss)
# 绘制原始数据和拟合曲线
plt.scatter(t_data, y_data, color='red', label='Data')
t_fine = np.linspace(min(t_data), max(t_data), 500)
y_fine = model(final_params, t_fine)
plt.plot(t_fine, y_fine, label='Fitted model', color='blue')
plt.title('Data and Fitted Model')
plt.xlabel('$t_i$')
plt.ylabel('$y_i$')
plt.legend()
plt.show()
Optimized Parameters: [0.19280696 0.19128186 0.12305642 0.13606211]
Final Loss: 0.0003075056038496488
使用自己编写的LMF算法,损失值很小,效果很好
总结
由于结果与选择的初始点,容忍度和学习率等参数关系较大,下面是各个算法大致的迭代次数和损失值作为结果:
方法名称 | 迭代次数 | 损失值 |
---|---|---|
最速下降法 | 3640 | 0.000307522 |
牛顿法 | 5 | 0.0003075056 |
阻尼牛顿法 | 48 | 0.0003075056 |
拟牛顿法 | 22 | 0.0003075056 |
共轭牛顿法 | 551 | 0.0003075056 |
LMF | 78 | 0.0003075056 |
经过比较得到,最速下降法的迭代次数最多但是损失值最大,可能与容忍度设置的较大有关,但是其迭代次数确实远多于其他方法
牛顿法虽然迭代次数少,但是通过较长时间的调试和初始点的设置,很容易出现错误解,但是阻尼牛顿法,拟牛顿法,共轭牛顿法次数较多,但是均是正确结果
LMF方法迭代次数适中,但是结果较少。