知识点回顾:
- 三种不同的模型可视化方法:推荐torchinfo打印summary+权重分布可视化
- 进度条功能:手动和自动写法,让打印结果更加美观
- 推理的写法:评估模式
作业:调整模型定义时的超参数,对比下效果。
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchinfo import summary
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
# 设置随机种子确保结果可复现
torch.manual_seed(42)
np.random.seed(42)
# 定义数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # MNIST数据集的均值和标准差
])
# 加载MNIST数据集
train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('data', train=False, transform=transform)
# 定义超参数配置
configs = [
{
'name': ' baseline',
'hidden_size': 128,
'learning_rate': 0.01,
'batch_size': 64,
'activation': 'relu'
},
{
'name': '增加隐藏层大小',
'hidden_size': 256,
'learning_rate': 0.01,
'batch_size': 64,
'activation': 'relu'
},
{
'name': '降低学习率',
'hidden_size': 128,
'learning_rate': 0.001,
'batch_size': 64,
'activation': 'relu'
},
{
'name': '增加批量大小',
'hidden_size': 128,
'learning_rate': 0.01,
'batch_size': 128,
'activation': 'relu'
},
{
'name': '使用LeakyReLU',
'hidden_size': 128,
'learning_rate': 0.01,
'batch_size': 64,
'activation': 'leaky_relu'
}
]
# 定义模型
class MLP(nn.Module):
def __init__(self, hidden_size=128, activation='relu'):
super(MLP, self).__init__()
self.fc1 = nn.Linear(28 * 28, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, 10)
# 根据配置选择激活函数
if activation == 'relu':
self.activation = nn.ReLU()
elif activation == 'leaky_relu':
self.activation = nn.LeakyReLU()
else:
raise ValueError(f"不支持的激活函数: {activation}")
def forward(self, x):
x = x.view(-1, 28 * 28)
x = self.activation(self.fc1(x))
x = self.activation(self.fc2(x))
x = self.fc3(x)
return x
# 训练和评估函数
def train_and_evaluate(config):
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'])
# 初始化模型
model = MLP(hidden_size=config['hidden_size'], activation=config['activation'])
# 打印模型摘要
print(f"\n=== {config['name']} 模型摘要 ===")
summary(model, input_size=(1, 28, 28))
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
# 训练模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
train_losses = []
test_accuracies = []
print(f"\n=== 训练 {config['name']} 模型 ===")
for epoch in range(5): # 训练5个epoch
model.train()
running_loss = 0.0
# 使用tqdm显示进度条
progress_bar = tqdm(enumerate(train_loader), total=len(train_loader))
for batch_idx, (data, target) in progress_bar:
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
progress_bar.set_description(f'Epoch {epoch+1}, Batch {batch_idx+1}/{len(train_loader)}, Loss: {loss.item():.4f}')
# 记录训练损失
epoch_loss = running_loss / len(train_loader)
train_losses.append(epoch_loss)
# 评估模型
model.eval() # 设置为评估模式
test_correct = 0
test_total = 0
with torch.no_grad(): # 关闭梯度计算以加速推理
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
_, predicted = torch.max(output.data, 1)
test_total += target.size(0)
test_correct += (predicted == target).sum().item()
test_accuracy = 100.0 * test_correct / test_total
test_accuracies.append(test_accuracy)
print(f'Epoch {epoch+1}, 训练损失: {epoch_loss:.4f}, 测试准确率: {test_accuracy:.2f}%')
# 可视化权重分布
visualize_weight_distribution(model, config['name'])
return train_losses, test_accuracies
# 可视化权重分布
def visualize_weight_distribution(model, config_name):
weights = []
for name, param in model.named_parameters():
if 'weight' in name:
weights.append(param.cpu().detach().numpy().flatten())
plt.figure(figsize=(10, 6))
plt.hist(weights, bins=50, label=[name for name, param in model.named_parameters() if 'weight' in name])
plt.title(f'{config_name} 模型权重分布')
plt.xlabel('权重值')
plt.ylabel('频率')
plt.legend()
plt.tight_layout()
plt.savefig(f'{config_name}_weights.png')
plt.close()
# 运行所有配置并比较结果
results = {}
for config in configs:
train_losses, test_accuracies = train_and_evaluate(config)
results[config['name']] = {
'train_losses': train_losses,
'test_accuracies': test_accuracies,
'final_accuracy': test_accuracies[-1]
}
# 比较不同配置的最终准确率
plt.figure(figsize=(12, 6))
plt.bar([config['name'] for config in configs],
[results[config['name']]['final_accuracy'] for config in configs])
plt.title('不同超参数配置的最终测试准确率')
plt.xlabel('配置')
plt.ylabel('准确率 (%)')
plt.ylim(95, 100) # 设置合适的y轴范围以便更好地比较
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('hyperparameter_comparison.png')
plt.show()
# 打印最终结果
print("\n=== 最终结果比较 ===")
for config in configs:
print(f"{config['name']}: 最终准确率 = {results[config['name']]['final_accuracy']:.2f}%")
=== 最终结果比较 ===
baseline: 最终准确率 = 96.05%
增加隐藏层大小: 最终准确率 = 95.38%
降低学习率: 最终准确率 = 97.64%
增加批量大小: 最终准确率 = 96.49%
使用LeakyReLU: 最终准确率 = 96.32%