可视化resnet
本篇就如何通过调整学习率来提高模型性能,着重体现使用tensorboard来实现resnet可视化。只展示使用 CosineAnnealingLR
并在每个 epoch 结束后调用 scheduler.step()
进行更新的代码。下篇将通过展示生成的图像,详细讲述不同学习率调整方案下对模型的影响。
步骤1:加载ResNet模型
在PyTorch中,你可以很方便地加载一个预训练的ResNet模型。这里以ResNet18为例,你可以根据需要选择其他版本(如ResNet34, ResNet50等)。
import torchvision.models as models
# 加载预训练的ResNet18模型
resnet18 = models.resnet18(pretrained=True)
步骤2:使用TensorBoard可视化模型结构
你需要创建一个SummaryWriter
实例,并使用它来写入模型的图结构。
from torch.utils.tensorboard import SummaryWriter
# 创建TensorBoard的SummaryWriter
writer = SummaryWriter('runs/resnet18')
# 将模型添加到TensorBoard
writer.add_graph(resnet18, input_to_model=torch.randn(1, 3, 224, 224))
# 关闭writer
writer.close()
这里,input_to_model
是一个假的输入张量,其大小为(1, 3, 224, 224)
,代表1张224x224大小的RGB图像。这是因为ResNet模型通常接受这种大小的输入。
步骤3:启动TensorBoard
在终端中运行以下命令以启动TensorBoard服务:
tensorboard --logdir=runs
代码
源代码
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR
writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')
# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 参数设置,使得我们能够手动输入命令行参数
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints')
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")
args = parser.parse_args()
# 超参数设置
EPOCH = 50
pre_epoch = 0 # 添加 pre_epoch 的定义
BATCH_SIZE = 128
LR = 0.1
# 准备数据集并预处理
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
# 模型定义-ResNet
net = ResNet18().to(device)
# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
# 初始化学习率调度器
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCH)
# 训练
if __name__ == "__main__":
best_acc = 85
print("Start Training, Resnet-18!")
with open("acc.txt", "w") as f:
with open("log.txt", "w") as f2:
for epoch in range(pre_epoch, EPOCH):
print('\nEpoch: %d' % (epoch + 1))
net.train()
sum_loss = 0.0
correct = 0.0
total = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
sum_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += predicted.eq(labels.data).cpu().sum()
print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
f2.write('%03d %05d |Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
f2.write('\n')
f2.flush()
scheduler.step() # 更新学习率(余弦退火)
# 每训练完一个epoch测试一下准确率
print("Waiting Test!")
with torch.no_grad():
correct = 0
total = 0
test_loss = 0.0
for data in testloader:
net.eval()
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
test_loss /= len(testloader)
acc = 100. * correct / total
print('测试分类准确率为:%.3f%%' % acc)
print('测试平均损失为:%.3f' % test_loss)
writer.add_scalar("Accuracy/test", acc, epoch)
writer.add_scalar("Loss/test", test_loss, epoch)
print('Saving model......')
torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
f.write('\n')
f.flush()
if acc > best_acc:
f3 = open("best_acc.txt", "w")
f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
f3.close()
best_acc = acc
print("Training Finished, TotalEPOCH=%d" % EPOCH)
writer.close()
一:新增库
增加了 from torch.utils.tensorboard import SummaryWriter 和 from torch.optim.lr_scheduler import CosineAnnealingLR 的导入。
二:初始化 TensorBoard 记录器
writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')
"runs/Log_SGDAndSchedulers_3"是我自定义的名称
三:添加学习率调度器
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCH)
四:训练循环中添加 TensorBoard 记录
writer.add_scalar("Accuracy/test", acc, epoch)
writer.add_scalar("Loss/test", test_loss, epoch)
五:在每个 epoch 结束时更新学习率
scheduler.step() # 更新学习率(余弦退火)
六:在终端启动tensorboard
tensorboard --logdir=runs/Log_SGDAndSchedulers_3