文章目录
前言
逆行不独行,隔离不分离。希望疫情早点结束,我们都平平安安。河大加油!
4.3 自动梯度计算
虽然我们能够通过模块化的方式比较好地对神经网络进行组装,但是每个模块的梯度计算过程仍然十分繁琐且容易出错。在深度学习框架中,已经封装了自动梯度计算的功能,我们只需要聚焦模型架构,不再需要耗费精力进行计算梯度。
飞桨提供了paddle.nn.Layer类,来方便快速的实现自己的层和模型。模型和层都可以基于paddle.nn.Layer扩充实现,模型只是一种特殊的层。继承了paddle.nn.Layer类的算子中,可以在内部直接调用其它继承paddle.nn.Layer类的算子,飞桨框架会自动识别算子中内嵌的paddle.nn.Layer类算子,并自动计算它们的梯度,并在优化时更新它们的参数。
pytorch中的相应内容是什么?请简要介绍。
torch提供了torch.nn.Module类,来方便快速的实现自己的层和模型。模型和层都可以基于nn扩充实现,模型只是一种特殊的层。它继承了torch.nn.Module类的算子中,可以在内部直接调用其它继承的算子,torch框架会自动识别算子中内嵌的torch.nn.Module类算子,并自动计算它们的梯度,并在优化时更新它们的参数。
4.3.1 使用pytorch的预定义算子来重新实现二分类任务
import torch.nn as nn
import torch.nn.functional as F
import os
import torch
from abc import abstractmethod
import math
import numpy as np
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.15)
num_train = 640
num_dev = 160
num_test = 200
X_train, y_train = X[:num_train], y[:num_train]
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev]
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:]
y_train = y_train.reshape([-1,1])
y_dev = y_dev.reshape([-1,1])
y_test = y_test.reshape([-1,1])
class Model_MLP_L2_V4(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Model_MLP_L2_V4, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
w=torch.normal(0,0.1,size=(hidden_size,input_size),requires_grad=True)
self.fc1.weight = nn.Parameter(w)
self.fc2 = nn.Linear(hidden_size, output_size)
w = torch.normal(0, 0.1, size=(output_size, hidden_size), requires_grad=True)
self.fc2.weight = nn.Parameter(w)
# 使用'torch.nn.functional.sigmoid'定义 Logistic 激活函数
self.act_fn = torch.sigmoid
# 前向计算
def forward(self, inputs):
z1 = self.fc1(inputs.to(torch.float32))
a1 = self.act_fn(z1)
z2 = self.fc2(a1)
a2 = self.act_fn(z2)
return a2
# def print_weights(runner):
# print('The weights of the Layers:')
#
# for item in runner.model.sublayers():
# print(item.full_name()
# for param in item.parameters():
# print(param.numpy())
class RunnerV2_2(object):
def __init__(self, model, optimizer, metric, loss_fn, **kwargs):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric
# 记录训练过程中的评估指标变化情况
self.train_scores = []
self.dev_scores = []
# 记录训练过程中的评价指标变化情况
self.train_loss = []
self.dev_loss = []
def train(self, train_set, dev_set, **kwargs):
# 将模型切换为训练模式
self.model.train()
# 传入训练轮数,如果没有传入值则默认为0
num_epochs = kwargs.get("num_epochs", 0)
# 传入log打印频率,如果没有传入值则默认为100
log_epochs = kwargs.get("log_epochs", 100)
# 传入模型保存路径,如果没有传入值则默认为"best_model.pdparams"
save_path = kwargs.get("save_path", "best_model.pdparams")
# log打印函数,如果没有传入则默认为"None"
custom_print_log = kwargs.get("custom_print_log", None)
# 记录全局最优指标
best_score = 0
# 进行num_epochs轮训练
for epoch in range(num_epochs):
X, y = train_set
# 获取模型预测
logits = self.model(X.to(torch.float32))
# 计算交叉熵损失
trn_loss = self.loss_fn(logits, y)
self.train_loss.append(trn_loss.item())
# 计算评估指标
trn_score = self.metric(logits, y).item()
self.train_scores.append(trn_score)
# 自动计算参数梯度
trn_loss.backward()
if custom_print_log is not None:
# 打印每一层的梯度
custom_print_log(self)
# 参数更新
self.optimizer.step()
# 清空梯度
self.optimizer.zero_grad() # reset gradient
dev_score, dev_loss = self.evaluate(dev_set)
# 如果当前指标为最优指标,保存该模型
if dev_score > best_score:
self.save_model(save_path)
print(f"[Evaluate] best accuracy performence has been updated: {
best_score:.5f} --> {
dev_score:.5f}")
best_score = dev_score
if log_epochs and epoch % log_epochs == 0:
print(f"[Train] epoch: {
epoch}/{
num_epochs}, loss: {
trn_loss.item()}")
@torch.no_grad()
def evaluate(self, data_set):
# 将模型切换为评估模式
self.model.eval()
X, y = data_set
# 计算模型输出
logits = self.model(X)
# 计算损失函数
loss = self.loss_fn(logits, y).item()
self.dev_loss.append(loss)
# 计算评估指标
score = self.metric(logits, y).item()
self.dev_scores.append(score)
return score, loss
# 模型测试阶段,使用'torch.no_grad()'控制不计算和存储梯度
@torch.no_grad()
def predict(self, X):
# 将模型切换为评估模式
self.model.eval()
return self.model(X)
# 使用'model.state_dict()'获取模型参数,并进行保存
def save_model(self, saved_path):
torch.save(self.model.state_dict(), saved_path)
# 使用'model.set_state_dict'加载模型参数
def load_model(self, model_path):
state_dict = torch.load(model_path)
self.model.load_state_dict(state_dict)
# 设置模型
input_size = 2
hidden_size = 5
output_size = 1
model = Model_MLP_L2_V4(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
# 设置损失函数
loss_fn = F.binary_cross_entropy
# 设置优化器
learning_rate = 0.2 #5e-2
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
# 设置评价指标
metric = accuracy
# 其他参数
epoch = 2000
saved_path = 'best_model.pdparams'
# 实例化RunnerV2类,并传入训练配置
runner = RunnerV2_2(model, optimizer, metric, loss_fn)
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs = epoch, log_epochs=50, save_path="best_model.pdparams")
plot(runner, 'fw-acc.pdf')
#模型评价
runner.load_model("best_model.pdparams")
score, loss = runner.evaluate([X_test, y_test])
print("[Test] score/loss: {:.4f}/{:.4f}".format(score, loss))
make_moons函数代码:
import torch
# 新增make_moons函数
def make_moons(n_samples=1000, shuffle=True, noise=None):
n_samples_out = n_samples // 2
n_samples_in = n_samples - n_samples_out
outer_circ_x = torch.cos(torch.linspace(0, math.pi, n_samples_out))
outer_circ_y = torch.sin(torch.linspace(0, math.pi, n_samples_out))
inner_circ_x = 1 - torch.cos(torch.linspace(0, math.pi, n_samples_in))
inner_circ_y = 0.5 - torch.sin(torch.linspace(0, math.pi, n_samples_in))
X = torch.stack(
[torch.cat([outer_circ_x, inner_circ_x]),
torch.cat([outer_circ_y, inner_circ_y])],
axis=1
)
y = torch.cat(
[torch.zeros([n_samples_out]), torch.ones([n_samples_in])]
)
if shuffle:
idx = torch.randperm(X.shape[0])
X = X[idx]
y = y[idx]
if noise is not None:
X += np.random.normal(0.0, noise, X.shape)
return X, y
accuracy函数代码:
def accuracy(preds, labels):
# 判断是二分类任务还是多分类任务,preds.shape[1]=1时为二分类任务,preds.shape[1]>1时为多分类任务
if preds.shape[1] == 1:
preds=(preds>=0.5).to(torch.float32)
else:
preds = torch.argmax(preds,dim=1).int()
return torch.mean((preds == labels).float())
plot函数代码:
import matplotlib.pyplot as plt
def plot(runner, fig_name):
plt.figure(figsize=(10, 5))
epochs = [i for i in range(len(runner.train_scores))]
plt.subplot(1, 2, 1)
plt.plot(epochs, runner.train_loss, color='#e4007f', label="Train loss")
plt.plot(epochs, runner.dev_loss, color='#f19ec2', linestyle=<