相对误差(Relative Error)变成 NaN
可能是由于在计算相对误差时分母为零或分母过小引起的。为了避免这种情况,需要在计算相对误差时进行一些保护处理。以下是解决相对误差变成 NaN
的方法,并对整个训练和验证过程进行适当的调整。
代码实现
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
# 定义一个简单的神经网络
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 10)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
# 生成数据集
def create_dataset():
data = torch.randn(10000, 784)
labels = torch.randint(0, 10, (10000,))
return data, labels
# 训练函数
def train(model, train_loader, criterion, optimizer):
model.train()
running_train_loss = 0.0
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_train_loss += loss.item()
avg_train_loss = running_train_loss / len(train_loader)
return avg_train_loss
# 测试函数,计算相对误差
def test(model, data_loader, criterion):
model.eval()
running_val_loss = 0.0
total_relative_error = 0.0
with torch.no_grad():
for inputs, targets in data_loader:
outputs = model(inputs)
loss = criterion(outputs, targets)
running_val_loss += loss.item()
predicted = torch.argmax(outputs, dim=1)
relative_error = torch.abs(predicted.float() - targets.float()) / (torch.abs(targets.float()) + 1e-8) # 避免分母为零
total_relative_error += relative_error.sum().item()
avg_val_loss = running_val_loss / len(data_loader)
avg_relative_error = total_relative_error / len(data_loader.dataset)
return avg_val_loss, avg_relative_error
# 主函数
def main():
model = SimpleNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
data, labels = create_dataset()
train_data, temp_data, train_labels, temp_labels = train_test_split(data, labels, test_size=0.3, random_state=42)
val_data, test_data, val_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, random_state=42)
train_loader = DataLoader(TensorDataset(train_data, train_labels), batch_size=32, shuffle=True)
val_loader = DataLoader(TensorDataset(val_data, val_labels), batch_size=32, shuffle=False)
test_loader = DataLoader(TensorDataset(test_data, test_labels), batch_size=32, shuffle=False)
num_epochs = 50
patience = 5
best_val_loss = float('inf')
early_stop_counter = 0
train_losses = []
val_losses = []
val_relative_errors = []
for epoch in range(num_epochs):
train_loss = train(model, train_loader, criterion, optimizer)
val_loss, val_relative_error = test(model, val_loader, criterion)
train_losses.append(train_loss)
val_losses.append(val_loss)
val_relative_errors.append(val_relative_error)
print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Relative Error: {val_relative_error:.4f}')
if val_loss < best_val_loss:
best_val_loss = val_loss
early_stop_counter = 0
else:
early_stop_counter += 1
if early_stop_counter >= patience:
print("Early stopping triggered")
break
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.subplot(1, 2, 2)
plt.plot(range(1, len(val_relative_errors) + 1), val_relative_errors, label='Validation Relative Error')
plt.xlabel('Epochs')
plt.ylabel('Relative Error')
plt.legend()
plt.title('Validation Relative Error')
plt.show()
# 最终测试集上的评估
test_loss, test_relative_error = test(model, test_loader, criterion)
print(f'Test Loss: {test_loss:.4f}, Test Relative Error: {test_relative_error:.4f}')
if __name__ == "__main__":
main()
代码解释
-
添加保护措施:
- 在计算相对误差时,添加一个小数
1e-8
防止分母为零。这避免了相对误差变成NaN
。
- 在计算相对误差时,添加一个小数
-
训练和验证损失的记录:
- 每个 epoch 结束时,记录训练损失和验证损失,以及验证相对误差。
-
早停机制:
- 在验证损失连续多个 epoch 不下降时,触发早停机制,停止训练。
-
绘制损失曲线和相对误差曲线:
- 在训练结束后,绘制训练和验证损失曲线,以及验证相对误差曲线。
通过这些修改,可以有效避免相对误差变成 NaN
,并通过观察训练和验证损失以及相对误差的变化来判断模型是否过拟合。