第三次作业
3. 用人工神经网络的方法求解由下列微分方程导出的优化问题 ( 1 ) { d y d x = x 3 − y x , y ( 1 ) = 2 5 . 该问题的精确解为 y ( x ) = x 4 5 + 1 5 x . ( i = 1 , ⋯ , m ) 可取为区间 [1,2]上均匀等分的点. \begin{gathered} \text{3. 用人工神经网络的方法求解由下列微分方程导出的优化问题} \\ \left.(1)\left\{\begin{aligned}&\frac{\mathrm{d}y}{\mathrm{d}x}=x^3-\frac{y}{x},\\&y(1)=\frac{2}{5}.\end{aligned}\right.\right. \\ \text{该问题的精确解为} \\ \begin{aligned}y(x)=\frac{x^{4}}{5}+\frac{1}{5x}.\end{aligned} \\ (i=1,\cdots,m)\text{可取为区间 [1,2]}\text {上均匀等分的点.} \end{gathered} 3. 用人工神经网络的方法求解由下列微分方程导出的优化问题(1)⎩ ⎨ ⎧dxdy=x3−xy,y(1)=52.该问题的精确解为y(x)=5x4+5x1.(i=1,⋯,m)可取为区间 [1,2]上均匀等分的点.
提供如下几种实现算法:
- 使用BFGS和DFP算法构建神经元搜索进行拟合
- 使用pytorch框架内置的BFGS搜索算法进行神经网络参数优化
- 替换pytorch中内置的BFGS算法为DFP算法
- 使用Adam优化器进行优化
得到的结论大致为:
- 在自己实现的算法中,BFGS算法在迭代次数和搜索成果上远不如DFP算法
- pytorch框架内置的BFGS算法效果远远强于其他搜索算法
- 自己实现的DFP算法结果不稳定
- Adam优化器效果在前期一般,在后面逐渐良好
下面是每个方法具体的实现过程和原理:
- 方法1
import math
import numpy as np
x = np.linspace(1, 2, 100)
def normal_line_search(xk, dk, rho, grad_f, beta=0.5, p=0.5):
return 0.01#简化模型所用的简单线性搜索
def wolfe_line_search(f, grad_f, xk, dk, alpha_0=0.01, rho=0.9, sigma=0.4, beta=0.5, max_iter=1000):
"""
使用Wolfe条件的线搜索算法来确定步长alpha。
参数:
f : 目标函数
grad_f : 目标函数的梯度
xk : 当前点
dk : 当前搜索方向
alpha_0 : 初始步长
rho : 充分减小(Armijo)条件中的参数
sigma : 曲率条件中的参数
beta : 在每次迭代中减小alpha的因子
max_iter : 最大迭代次数
返回:
alpha_k : 满足Wolfe条件的步长
"""
alpha_k = alpha_0
fk = f(xk)
grad_fk = grad_f(xk)
for _ in range(max_iter):
if f(xk + alpha_k * dk) > fk + rho * alpha_k * np.dot(grad_fk, dk):
# 如果不满足充分减小条件,则减少alpha
alpha_k *= beta
elif np.dot(grad_f(xk + alpha_k * dk), dk) < sigma * np.dot(grad_fk, dk):
# 如果不满足曲率条件,则增加alpha
alpha_k /= beta
else:
# 如果同时满足充分减小条件和曲率条件,则返回alpha
return alpha_k
return 0.1
Nodes_Count = 3
def sigmoid(x): #激活函数
return 1 / (1 + math.exp(-x))
def grad_sigmoid(x): #激活函数的梯度
return sigmoid(x) * (1 - sigmoid(x))
def neural_network(x, q, n=3): #使用神经网络计算一次结果,q为参数(w*n,v*n,o*n)(输入权重,输出权重,偏置),n数量
f = 0
for i in range(n):
f += q[i+n] * sigmoid(q[i] * (x-1) - q[i+2*n]) #每一个节点 = 输出权重*sigmoid(输入权重-偏置量)
return 0.4 + (x-1) * f #y(1) = 0.4, 以x=1为基准计算神经网络输出值
def grad_neural_network(x, q, n=3): #得到由神经网络计算算法对于参数的梯度
grad = np.zeros_like(q)
for i in range(n):
sig_val = sigmoid(q[i]*(x-1) - q[i+2*n])
grad[i] = (x-1) * q[i+n] * grad_sigmoid(q[i]*(x-1) - q[i+2*n])
grad[i+n] = sig_val
grad[i+2*n] = -q[i+n] * grad_sigmoid(q[i]*(x-1) - q[i+2*n])
return grad #梯度量(w梯度*n,v梯度*n,o梯度*n)
def grad_func(x,y): #已知函数的导数
return x**3 - y / x
def func(x): #目标函数
return x**4 / 5 + 1 / (5 * x)
def loss(q, n=Nodes_Count):
x = np.linspace(1, 2, 100)
grad_y_neural_network = np.array([grad_neural_network(xi, q, n) for xi in x]) #神经网络算法对于参数的梯度
y_neural_network = np.array([neural_network(xi, q, n) for xi in x])#神经网络方法的值
grad_y_neural_network = grad_func(x, y_neural_network)#由神经网络得到拟合值,计算出的拟合导数
y_true = func(x)#真实函数
func_diff = (y_neural_network[1:] - y_neural_network[:-1]) / (x[1:] - x[:-1])#神经网络函数的导数
SE = (grad_y_neural_network[:len(x) - 1] - func_diff) ** 2 #误差计算:拟合导数-真实函数的导数
# 使用均方误差作为损失函数
loss_value = np.mean(SE)
return abs(loss_value)
def grad_loss(q): #计算误差值的对于神经网络参数的梯度
# 使用数值方法来估计梯度
epsilon = 0.01
grad = np.zeros_like(q)
loss_q = loss(q)
for i in range(len(q)):
q_plus = np.array(q)
q_plus[i] += epsilon
grad[i] = (loss(q_plus) - loss_q) / epsilon
return grad
def armijo_search(xk, dk, rho, grad_f, beta=0.5, p=0.5): #搜索步长
alpha_k = 1
while loss(xk + alpha_k * dk) > loss(xk) + rho * alpha_k * np.dot(grad_f(xk), dk):
alpha_k *= p
return alpha_k
def DFP_method(loss, grad_loss, xk, epsilon=1e-5,max_iterations=100, rho=0.6, beta=0.5, p=0.9):
n = len(xk)
Hk = np.eye(n) # 初始化近似Hessian矩阵的逆为单位矩阵
iteration_count = 0
for iteration in range(max_iterations):
grad_fk = grad_loss(xk) # 当前梯度
dk = -np.dot(Hk, grad_fk) # 确定搜索方向
alpha_k = normal_line_search(xk, dk, rho, grad_loss, beta, p) # 确定步长
xkp1 = xk + alpha_k * dk # 更新位置
grad_fkp1 = grad_loss(xkp1) # 新位置的梯度
sk = alpha_k * dk
yk = grad_fkp1 - grad_fk
rho_k = 1.0 / np.dot(yk, sk)
I = np.eye(n)
Hk = (I - rho_k * np.outer(sk, yk)) @ Hk @ (I - rho_k * np.outer(yk, sk)) + rho_k * np.outer(sk, sk)
if np.linalg.norm(grad_fkp1) < epsilon: # 终止条件
break
xk = xkp1 # 准备下一次迭代
iteration_count = iteration_count + 1
return xk,iteration_count
def bfgs_method(loss, grad_loss, xk, epsilon=1e-5, max_iterations=5000, rho=0.6, beta=0.5, p=0.9):
n = len(xk)
Bk = np.eye(n) # 初始化近似Hessian矩阵的逆为单位矩阵
iteration_count = 0
for iteration in range(max_iterations):
grad_fk = grad_loss(xk) # 当前梯度
dk = -np.dot(Bk, grad_fk) # 确定搜索方向
alpha_k = normal_line_search(xk, dk, rho, grad_loss, beta, p) # 确定步长
xkp1 = xk + alpha_k * dk # 更新位置
grad_fkp1 = grad_loss(xkp1) # 新位置的梯度
sk = xkp1 - xk
yk = grad_fkp1 - grad_fk
if np.dot(yk, sk) > 0: # 确保更新是正定的
Bk = Bk - np.outer(Bk @ sk, sk @ Bk) / (sk @ Bk @ sk) + np.outer(yk, yk) / np.dot(yk, sk)
if np.linalg.norm(grad_fkp1) < epsilon: # 终止条件
break
xk = xkp1 # 准备下一次迭代
iteration_count = iteration_count + 1
return xk,iteration_count
initial_guess = np.ones(3* Nodes_Count) # 初始猜测
#result = minimize(loss, initial_guess, method='BFGS', jac=grad_loss, options={'disp': True})
result,iteration_count = DFP_method(loss, grad_loss,max_iterations=800, xk=initial_guess, epsilon=0.01, rho=0.6)
optimized_params = result
print("DFP")
print("Optimized Params: " + str(optimized_params))
print("Iteration Times: " + str(iteration_count))
result,iteration_count = bfgs_method(loss, grad_loss,max_iterations=5000, xk=initial_guess, epsilon=0.01, rho=0.6)
optimized_params = result
print("BFGS")
print("Optimized Params: " + str(optimized_params))
print("Iteration Times: " + str(iteration_count))
DFP
Optimized Params: [2.03306936 2.0331901 1.13568128 3.09200305 3.09138072 0.13135772
2.17723677 2.17737288 3.71462521]
Iteration Times: 518
BFGS
Optimized Params: [1.94205996 1.94116378 1.94133311 1.78147099 1.78400681 1.78647351
1.76430309 1.76379254 1.76445937]
Iteration Times: 5000
- 使用DFP参数
import numpy as np
import matplotlib.pyplot as plt
# 假设测试集x_test在[1, 2]区间内随机取点
np.random.seed(42) # 保证示例的可复现性
x_test = np.random.uniform(1, 2, 100) # 随机生成100个测试点
optimized_params = [2.03306936 ,2.0331901, 1.13568128 ,3.09200305 ,3.09138072, 0.131357722,2.17723677 ,2.17737288, 3.71462521] # 示例参数,实际参数应由优化算法得出
# 定义神经网络模型,使用上文的神经网络结构和激活函数
def neural_network(x, q, n=3): # 使用神经网络计算一次结果,q为参数(w*n,v*n,o*n)(输入权重,输出权重,偏置),n数量
f = 0
for i in range(n-1):
f += q[i+n] * sigmoid(q[i] * (x-1) - q[i+2*n]) # 每一个节点 = 输出权重*sigmoid(输入权重-偏置量)
return 0.4 + (x-1) * f # y(1) = 0.4, 以x=1为基准计算神经网络输出值
# 使用优化后的参数计算预测值
y_pred = np.array([neural_network(xi, optimized_params, Nodes_Count) for xi in x_test])
# 真实的y值,假设我们通过某种方式知道了测试集的真实y值(例如,通过一个已知的函数)
y_test = func(x_test) # 使用之前定义的目标函数计算真实的y值
# 绘制测试集的真实y值与预测y值的关系
plt.figure(figsize=(10, 6))
plt.scatter(x_test, y_test, color='blue', alpha=0.5, label='True $y_{test}$')
plt.scatter(x_test, y_pred, color='red', alpha=0.5, label='Predicted $y_{pred}$')
plt.title('Relationship between $y_{test}$ and $y_{pred}$')
plt.xlabel('$x_{test}$')
plt.ylabel('y value')
plt.legend()
plt.grid(True)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
# 假设测试集x_test在[1, 2]区间内随机取点
np.random.seed(42) # 保证示例的可复现性
x_test = np.random.uniform(1, 2, 100) # 随机生成100个测试点
# 使用优化后的神经网络参数进行预测
# 假设这些是通过优化算法得到的参数[1.59588066 ,1.59437607 ,1.59333783 ,1.60366427 ,1.60667915 ,1.60915205 ,1.14775471 ,1.14602985, 1.1467363 ]
optimized_params = [1.94205996 ,1.94116378 ,1.94133311 ,1.78147099 ,1.78400681 ,1.78647351 ,1.76430309, 1.76379254, 1.76445937] # 示例参数,实际参数应由优化算法得出
# 定义神经网络模型,使用上文的神经网络结构和激活函数
def neural_network(x, q, n=3): # 使用神经网络计算一次结果,q为参数(w*n,v*n,o*n)(输入权重,输出权重,偏置),n数量
f = 0
for i in range(n-1):
f += q[i+n] * sigmoid(q[i] * (x-1) - q[i+2*n]) # 每一个节点 = 输出权重*sigmoid(输入权重-偏置量)
return 0.4 + (x-1) * f # y(1) = 0.4, 以x=1为基准计算神经网络输出值
# 使用优化后的参数计算预测值
y_pred = np.array([neural_network(xi, optimized_params, Nodes_Count) for xi in x_test])
# 真实的y值,假设我们通过某种方式知道了测试集的真实y值(例如,通过一个已知的函数)
y_test = func(x_test) # 使用之前定义的目标函数计算真实的y值
# 绘制测试集的真实y值与预测y值的关系
plt.figure(figsize=(10, 6))
plt.scatter(x_test, y_test, color='blue', alpha=0.5, label='True $y_{test}$')
plt.scatter(x_test, y_pred, color='red', alpha=0.5, label='Predicted $y_{pred}$')
plt.title('Relationship between $y_{test}$ and $y_{pred}$')
plt.xlabel('$x_{test}$')
plt.ylabel('y value')
plt.legend()
plt.grid(True)
plt.show()
- 上面是BFGS参数
上面方法使用了通过构造三个神经元,使用BFGS和DFP方法搜索损失函数最小值并且进行优化,根据运算结果可得到如下结论:
-
DFP方法迭代次数少,运算速度快,搜索效果最好,在少数的迭代次数就能收敛到0.01
-
BFGS方法迭代次数多,运算速度慢,搜索效果一般,在1600次迭代才能收敛到0.01
-
方法2:使用pytorch框架中的Adam优化器
import numpy as np
import torch
from torch import nn, optim
# 定义网络结构
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()
# 定义输入层到隐藏层的线性变换,输入维度为1,输出维度为10
self.hidden = nn.Linear(1, 10)
# 定义隐藏层到输出层的线性变换,输入维度为10,输出维度为1
self.predict = nn.Linear(10, 1)
def forward(self, x):
# 使用sigmoid激活函数处理隐藏层的输出
x = torch.sigmoid(self.hidden(x))
# 输出层的线性变换
x = self.predict(x)
return x
# 定义微分方程的损失函数部分
def ode_loss(y_pred, x):
# 计算预测值y_pred关于输入x的导数
dy_dx = torch.autograd.grad(y_pred.sum(), x, create_graph=True)[0]
# 计算损失:预测的导数与微分方程右侧表达式的差的平方的均值
return torch.mean((dy_dx - x**3 + y_pred/x)**2)
# 初始化神经网络模型
model = SimpleNN()
# 定义损失函数和优化器
# 使用均方误差损失函数
criterion = nn.MSELoss()
# 使用Adam优化器,并设置学习率为0.01
optimizer = optim.Adam(model.parameters(), lr=0.01)
# 准备训练数据:在区间[1, 2]上均匀选取100个点作为训练数据
x_train = torch.unsqueeze(torch.linspace(1, 2, 100), dim=1)
# 使用微分方程的精确解作为训练标签
y_train = x_train**4 / 5 + 1/(5*x_train)
# 定义一个函数来计算精确解,便于后续验证
def F_n(x_train):
return x_train**4 / 5 + 1/(5*x_train)
# 训练过程
epochs = 1000 # 设置训练轮数
for epoch in range(epochs):
y_pred = model(x_train.requires_grad_()) # 对训练数据进行预测
loss = criterion(y_pred, y_train) + ode_loss(y_pred, x_train) # 计算总损失
optimizer.zero_grad() # 清空梯度
loss.backward() # 反向传播计算梯度
optimizer.step() # 更新模型参数
if (epoch+1) % 100 == 0:
# 每100轮打印一次损失值
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
# 测试训练后的模型的准确性
with torch.no_grad():
# 对x=1.5的情况进行预测
y_pred = model(torch.unsqueeze(torch.Tensor([1.5]), dim=1))
# 打印预测值、精确解和实际精确解
print(f"Predicted y(1.5): {y_pred.item()}, Expected: {1.5**4 / 5 + 1/(5*1.5)}")
print(f"Reality is:{F_n(1.5)}")
Epoch 100, Loss: 8.820108413696289
Epoch 200, Loss: 3.704960584640503
Epoch 300, Loss: 1.9521238803863525
Epoch 400, Loss: 1.3715859651565552
Epoch 500, Loss: 0.9456997513771057
Epoch 600, Loss: 0.5956696271896362
Epoch 700, Loss: 0.32994797825813293
Epoch 800, Loss: 0.1603136509656906
Epoch 900, Loss: 0.07214890420436859
Epoch 1000, Loss: 0.033888448029756546
Predicted y(1.5): 1.113245964050293, Expected: 1.1458333333333333
Reality is:1.1458333333333333
import matplotlib.pyplot as plt
x_train=torch.unsqueeze(torch.linspace(1, 2, 100), dim=1)
y_reality= x_train**4 / 5 + 1/(5*x_train)
y_pred = model(torch.unsqueeze(torch.Tensor(x_train), dim=1)).squeeze()
plt.plot(x_train,y_pred.detach().numpy(), label='Predicted')
plt.plot(x_train, y_reality.detach().numpy(), label='True')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Predicted and True Values')
plt.legend()
plt.grid(True)
plt.show()
y_reality=y_reality.squeeze()
plt.plot(x_train,(y_pred-y_reality).detach().numpy(), label='Predicted')
plt.plot(x_train,(y_pred-y_pred).detach().numpy(), label='Predicted')
plt.xlabel('x')
plt.ylabel('difference')
Text(0, 0.5, 'difference')
在本方法中,使用了常用于求解损失函数最小值的优化器Adam,并且设置了其学习率为0.01
优化后绘制出其与精确解的图像,发现拟合效果良好,绘制差异值,偏差在0.08以下,说明其拟合效果好
- 方法3,使用pytorch框架下的bfgs优化算法
import torch
from scipy.optimize import minimize
from torch.autograd import grad
import numpy as np
# 定义神经网络结构
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(1, 10) # 假设输入是一维的
self.fc2 = torch.nn.Linear(10, 1) # 添加一个新层,从3个神经元到1个输出
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = self.fc2(x) # 确保网络的最后输出是单一值
return x
# 实例化网络
net = Net()
# 定义损失函数
def loss_fn(net, x, y_true):
y_pred = net(x)
loss = (y_pred - y_true).pow(2).sum() # MSE损失
return loss
def objective(flat_params, net, x_train, y_train):
# 清零现有的梯度
net.zero_grad()
# 更新网络参数
start = 0
for p in net.parameters():
end = start + p.numel()
p.data = torch.from_numpy(flat_params[start:end]).view(p.size()).type(torch.float32)
start = end
# 计算损失
y_pred = net(x_train)
loss = (y_pred - y_train).pow(2).sum()
loss.backward()
# 收集梯度
gradients = torch.cat([p.grad.view(-1) for p in net.parameters()]).numpy()
return loss.item(), gradients
# 注意:在调用minimize函数之前,确保x_train和y_train是正确的数据类型
x_train_np = x_train.detach().numpy().astype(np.float32)
y_train_np = y_train.detach().numpy().astype(np.float32)
# 重新扁平化网络参数为初始猜测
flat_params = np.concatenate([p.data.numpy().ravel() for p in net.parameters()])
# 调用minimize函数all
res_bfgs = minimize(
fun=objective,
x0=flat_params,
args=(net, torch.tensor(x_train_np), torch.tensor(y_train_np)),
method='BFGS',
jac=True,
options={'maxiter': 500, 'disp': True}
)
Warning: Desired error not necessarily achieved due to precision loss.
Current function value: 0.000019
Iterations: 95
Function evaluations: 173
Gradient evaluations: 163
# 网络预测
# 用于演示的网络预测
# 网络预测
x_test= torch.linspace(1, 2, 100).view(100, 1)
with torch.no_grad():
# 确保网络输出为一维数组
y_pred = net(x_test).squeeze().numpy() # 将网络输出转换为一维数组
# 计算精确解
x_test_1d = x_test.squeeze() # 确保x_test是一维的
y_exact = (x_test_1d**4 / 5 + 1 / (5*x_test_1d)).numpy()
# 检查y_pred和y_exact的形状是否一致
assert y_pred.shape == y_exact.shape, f"Shapes of y_pred and y_exact do not match: {y_pred.shape} vs {y_exact.shape}"
# 继续以前的计算差异和可视化...
difference = np.abs(y_pred - y_exact)
# 输出最大差异
print(f'Max difference between the neural network and exact solution: {np.max(difference)}')
# 可视化
plt.plot(x_test.numpy().flatten(), y_pred, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_exact, label='Exact Solution', linestyle='--')
plt.fill_between(x_test.numpy().flatten(), y_pred, y_exact, color='grey', alpha=0.5)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
Max difference between the neural network and exact solution: 0.0014634132385253906
plt.plot(x_test.numpy().flatten(), y_pred-y_exact, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_pred-y_pred,label='Exact Solution', linestyle='--')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
import torch
from scipy.optimize import minimize
import numpy as np
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(1, 10) # 假设输入是一维的
self.fc2 = torch.nn.Linear(10, 1) # 添加一个新层,从10个神经元到1个输出
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = self.fc2(x) # 确保网络的最后输出是单一值
return x
net = Net()
def loss_fn(net, x, y_true):
y_pred = net(x)
loss = (y_pred - y_true).pow(2).sum() # MSE损失
return loss
def objective(flat_params, net, x_train, y_train):
net.zero_grad()
# 更新网络参数
start = 0
for p in net.parameters():
end = start + p.numel()
p.data.copy_(torch.from_numpy(flat_params[start:end]).view(p.size()).type(torch.float32))
start = end
# 计算损失
y_pred = net(x_train)
loss = (y_pred - y_train).pow(2).sum()
loss.backward()
# 收集梯度,并确保是float64类型
gradients = torch.cat([p.grad.view(-1) for p in net.parameters()]).detach().numpy().astype(np.float64)
return loss.item(), gradients
# 假设 x_train 和 y_train 已经定义
x_train = torch.linspace(1, 2, 100).view(-1, 1)
y_train = x_train**4 / 5 + 1 / (5*x_train)
# 重新扁平化网络参数为初始猜测
flat_params = np.concatenate([p.data.numpy().ravel() for p in net.parameters()]).astype(np.float64)
# 使用 L-BFGS-B 算法进行优化
res_bfgs = minimize(
fun=objective,
x0=flat_params,
args=(net, torch.tensor(x_train_np, dtype=torch.float32), torch.tensor(y_train_np, dtype=torch.float32)),
method='L-BFGS-B',
jac=True,
options={'maxiter': 500, 'disp': True}
)
print('Optimization result:', res_bfgs)
Optimization result: message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
success: True
status: 0
fun: 8.822944437270053e-06
x: [ 4.697e-01 1.386e+00 ... 1.738e+00 9.475e-01]
nit: 445
jac: [-4.325e-05 4.110e-04 ... 2.297e-05 -2.766e-05]
nfev: 578
njev: 578
hess_inv: <31x31 LbfgsInvHessProduct with dtype=float64>
# 网络预测
# 用于演示的网络预测
# 网络预测
with torch.no_grad():
# 确保网络输出为一维数组
y_pred = net(x_test).squeeze().numpy() # 将网络输出转换为一维数组
# 计算精确解
x_test_1d = x_test.squeeze() # 确保x_test是一维的
y_exact = (x_test_1d**4 / 5 + 1 / (5*x_test_1d)).numpy()
# 检查y_pred和y_exact的形状是否一致
assert y_pred.shape == y_exact.shape, f"Shapes of y_pred and y_exact do not match: {y_pred.shape} vs {y_exact.shape}"
# 继续以前的计算差异和可视化...
difference = np.abs(y_pred - y_exact)
# 输出最大差异
print(f'Max difference between the neural network and exact solution: {np.max(difference)}')
# 可视化
plt.plot(x_test.numpy().flatten(), y_pred, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_exact, label='Exact Solution', linestyle='--')
plt.fill_between(x_test.numpy().flatten(), y_pred, y_exact, color='grey', alpha=0.5)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
Max difference between the neural network and exact solution: 0.0008747577667236328
plt.plot(x_test.numpy().flatten(), y_pred-y_exact, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_pred-y_pred,label='Exact Solution', linestyle='--')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
- 方法三,使用自己写的dfp算法替代bfgs算法进行参数优化
def dfp(objective, x0, args=(), max_iter=3000, tol=1e-3):
"""
使用DFP算法进行优化。
参数:
objective : function
目标函数。
x0 : array_like
初始参数。
args : tuple, optional
额外的参数传递给目标函数。
max_iter : int, optional
最大迭代次数。
tol : float, optional
容忍的收敛性标准。
返回:
result : OptimizeResult
优化结果对象,包含优化的参数和其他信息。
"""
n = len(x0)
H = np.eye(n) # 初始化Hessian矩阵的逆为单位矩阵
x = x0
fval, grad = objective(x, *args) # 计算初始点的函数值和梯度
iters = 0
while np.linalg.norm(grad) > tol and iters < max_iter:
p = -np.dot(H, grad) # 计算搜索方向
# 使用线搜索确定步长
alpha = 1e-2
# 更新参数
x_new = x + alpha * p
fval_new, grad_new = objective(x_new, *args)
s = x_new - x
y = grad_new - grad
# 更新Hessian逆矩阵
Hy = np.dot(H, y)
H += np.outer(s, s) / np.dot(y, s) - np.outer(Hy, Hy) / np.dot(y, Hy)
x = x_new
fval = fval_new
grad = grad_new
iters += 1
return x, fval, grad, iters
import torch
from scipy.optimize import minimize
from torch.autograd import grad
import numpy as np
# 定义神经网络结构
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(1, 10) # 假设输入是一维的
self.fc2 = torch.nn.Linear(10, 1) # 添加一个新层,从3个神经元到1个输出
def forward(self, x):
x = torch.tanh(self.fc1(x))
x = self.fc2(x) # 确保网络的最后输出是单一值
return x
# 实例化网络
net = Net()
# 定义损失函数
def loss_fn(net, x, y_true):
y_pred = net(x)
loss = (y_pred - y_true).pow(2).sum() # MSE损失
return loss
def objective(flat_params, net, x_train, y_train):
# 清零现有的梯度
net.zero_grad()
# 更新网络参数
start = 0
for p in net.parameters():
end = start + p.numel()
p.data = torch.from_numpy(flat_params[start:end]).view(p.size()).type(torch.float32)
start = end
# 计算损失
y_pred = net(x_train)
loss = (y_pred - y_train).pow(2).sum()
loss.backward()
# 收集梯度
gradients = torch.cat([p.grad.view(-1) for p in net.parameters()]).numpy()
return loss.item(), gradients
# 注意:在调用minimize函数之前,确保x_train和y_train是正确的数据类型
x_train_np = x_train.detach().numpy().astype(np.float32)
y_train_np = y_train.detach().numpy().astype(np.float32)
# 重新扁平化网络参数为初始猜测
flat_params = np.concatenate([p.data.numpy().ravel() for p in net.parameters()])
result = dfp(objective, flat_params, args=(net, x_train, y_train))
# 调用minimize函数all
# res_bfgs = minimize(
# fun=objective,
# x0=flat_params,
# args=(net, torch.tensor(x_train_np), torch.tensor(alphay_train_np)),
# method='BFGS',
# jac=True,
# options={'maxiter': 500, 'disp': True}
# )
print(result[3])
3000
# 网络预测
# 用于演示的网络预测
# 网络预测
with torch.no_grad():
# 确保网络输出为一维数组
y_pred = net(x_test).squeeze().numpy() # 将网络输出转换为一维数组
# 计算精确解
x_test_1d = x_test.squeeze() # 确保x_test是一维的
y_exact = (x_test_1d**4 / 5 + 1 / (5*x_test_1d)).numpy()
# 检查y_pred和y_exact的形状是否一致
assert y_pred.shape == y_exact.shape, f"Shapes of y_pred and y_exact do not match: {y_pred.shape} vs {y_exact.shape}"
# 继续以前的计算差异和可视化...
difference = np.abs(y_pred - y_exact)
# 输出最大差异
print(f'Max difference between the neural network and exact solution: {np.max(difference)}')
# 可视化
plt.plot(x_test.numpy().flatten(), y_pred, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_exact, label='Exact Solution', linestyle='--')
plt.fill_between(x_test.numpy().flatten(), y_pred, y_exact, color='grey', alpha=0.5)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
Max difference between the neural network and exact solution: 0.010486841201782227
plt.plot(x_test.numpy().flatten(), y_pred-y_exact, label='Neural Network Prediction')
plt.plot(x_test.numpy().flatten(), y_pred-y_pred,label='Exact Solution', linestyle='--')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Comparison between Neural Network Prediction and Exact Solution')
plt.show()
比较奇怪的一点是自己写的BFGS算法和pytorch中内置的算法效果完全不同,pytorch内置的算法效果相当好,可能是存在优化的原因