使用学习率调度器优化resnet模型性能

源代码

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR

writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置,使得我们能够手动输入命令行参数
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints')
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")
args = parser.parse_args()

# 超参数设置
EPOCH = 50
pre_epoch = 0  # 添加 pre_epoch 的定义
BATCH_SIZE = 128
LR = 0.1

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 模型定义-ResNet
net = ResNet18().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)

# 初始化学习率调度器
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCH)

# 训练
if __name__ == "__main__":
    best_acc = 85
    print("Start Training, Resnet-18!")
    with open("acc.txt", "w") as f:
        with open("log.txt", "w") as f2:
            for epoch in range(pre_epoch, EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                             % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()

                scheduler.step()  # 更新学习率(余弦退火)

                # 每训练完一个epoch测试一下准确率
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    test_loss = 0.0
                    for data in testloader:
                        net.eval()
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = net(images)
                        loss = criterion(outputs, labels)
                        test_loss += loss.item()
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum()
                    test_loss /= len(testloader)
                    acc = 100. * correct / total
                    print('测试分类准确率为:%.3f%%' % acc)
                    print('测试平均损失为:%.3f' % test_loss)

                    writer.add_scalar("Accuracy/test", acc, epoch)
                    writer.add_scalar("Loss/test", test_loss, epoch)

                    print('Saving model......')
                    torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)
    writer.close()

将SGD优化器改为Adam

代码
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
import random
import time

writer = SummaryWriter(log_dir='runs/Adam')

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置,使得我们能够手动输入命令行参数,就是让风格变得和Linux命令行差不多
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') #输出结果保存路径
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")  #恢复训练时的模型路径
args = parser.parse_args()

# 超参数设置
EPOCH = 50   #遍历数据集次数
pre_epoch = 0  # 定义已经遍历数据集的次数
BATCH_SIZE = 128      #批处理尺寸(batch_size)
LR = 0.01        #学习率

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

validset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_test)
validloader = torch.utils.data.DataLoader(validset, batch_size=100, shuffle=False, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# Cifar-10的标签
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# 模型定义-ResNet
net = ResNet18().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LR, weight_decay=5e-4)  # 使用Adam优化器

# 记录初始学习率
for param_group in optimizer.param_groups:
    writer.add_scalar("Learning Rate", param_group['lr'], 0)

# 训练
if __name__ == "__main__":
    best_acc = 85
    print("Start Training, Resnet-18!")
    with open("acc.txt", "w") as f:
        with open("log.txt", "w")as f2:
            for epoch in range(pre_epoch, EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                for i, data in enumerate(trainloader, 0):
                    length = len(trainloader)
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()

                    outputs = net(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                             % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()

                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    test_loss = 0.0
                    for data in testloader:
                        net.eval()
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = net(images)
                        loss = criterion(outputs, labels)
                        test_loss += loss.item()

                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum()
                    test_loss /= len(testloader)
                    acc = 100. * correct / total
                    print('测试分类准确率为:%.3f%%' % acc)
                    print('测试平均损失为:%.3f' % test_loss)

                    writer.add_scalar("Accuracy/test", acc, epoch)
                    writer.add_scalar("Loss/test", test_loss, epoch)

                    print('Saving model......')
                    torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc
            print("Training Finished, TotalEPOCH=%d" % EPOCH)

# 关闭writer
        writer.close()
图像

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

SGD优化器并使用CosineAnnealingLR 和 ReduceLROnPlateau调整学习率

代码

在这段代码中,我使用了 SGD 优化器,并设置了动量。同时,结合了 CosineAnnealingLRReduceLROnPlateau 两种调度器,前者根据余弦曲线自动调整学习率,后者根据验证集的性能(如损失不再下降)调整学习率。这种组合有助于在训练过程中适应不同阶段的需求。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置,使得我们能够手动输入命令行参数
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints')
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")
args = parser.parse_args()

# 超参数设置
EPOCH = 50
pre_epoch = 0  # 添加 pre_epoch 的定义
BATCH_SIZE = 128
LR = 0.1

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 模型定义-ResNet
net = ResNet18().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)

# 初始化学习率调度器
scheduler_cosine = CosineAnnealingLR(optimizer, T_max=EPOCH)
scheduler_plateau = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

# 训练
if __name__ == "__main__":
    best_acc = 85
    print("Start Training, Resnet-18!")
    with open("acc.txt", "w") as f:
        with open("log.txt", "w") as f2:
            for epoch in range(pre_epoch, EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                             % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()

                scheduler_cosine.step()  # 更新学习率(余弦退火)
                scheduler_plateau.step(sum_loss)  # 每个epoch结束后检查是否需要通过plateau调整学习率

                # 每训练完一个epoch测试一下准确率
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    test_loss = 0.0
                    for data in testloader:
                        net.eval()
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = net(images)
                        loss = criterion(outputs, labels)
                        test_loss += loss.item()
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum()
                    test_loss /= len(testloader)
                    acc = 100. * correct / total
                    print('测试分类准确率为:%.3f%%' % acc)
                    print('测试平均损失为:%.3f' % test_loss)

                    writer.add_scalar("Accuracy/test", acc, epoch)
                    writer.add_scalar("Loss/test", test_loss, epoch)

                    print('Saving model......')
                    torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)
    writer.close()

结果
EPOCH=001,Accuracy= 39.310%
EPOCH=002,Accuracy= 47.420%
EPOCH=003,Accuracy= 56.670%
EPOCH=004,Accuracy= 66.260%
EPOCH=005,Accuracy= 66.380%
EPOCH=006,Accuracy= 73.890%
EPOCH=007,Accuracy= 76.840%
EPOCH=008,Accuracy= 75.420%
EPOCH=009,Accuracy= 80.520%
EPOCH=010,Accuracy= 79.710%
EPOCH=011,Accuracy= 79.330%
EPOCH=012,Accuracy= 74.160%
EPOCH=013,Accuracy= 82.560%
EPOCH=014,Accuracy= 78.710%
EPOCH=015,Accuracy= 77.590%
EPOCH=016,Accuracy= 79.860%
EPOCH=017,Accuracy= 84.960%
EPOCH=018,Accuracy= 84.780%
EPOCH=019,Accuracy= 84.370%
EPOCH=020,Accuracy= 85.880%
EPOCH=021,Accuracy= 85.250%
EPOCH=022,Accuracy= 84.260%
EPOCH=023,Accuracy= 83.670%
EPOCH=024,Accuracy= 84.290%
EPOCH=025,Accuracy= 87.520%
EPOCH=026,Accuracy= 87.210%
EPOCH=027,Accuracy= 85.560%
EPOCH=028,Accuracy= 88.210%
EPOCH=029,Accuracy= 87.010%
EPOCH=030,Accuracy= 88.680%

在这里插入图片描述
在这里插入图片描述

SGD优化器+ 只使用 CosineAnnealingLR学习率调度器

代码
  • 删除了 ReduceLROnPlateau 调度器的初始化和相关代码,只保留 CosineAnnealingLR 调度器。
  • 确保 scheduler 只使用 CosineAnnealingLR 并在每个 epoch 结束后调用 scheduler.step() 进行更新。

通过这段代码,你可以仅使用 CosineAnnealingLR 调度器来实验模型的效果。运行代码并观察 TensorBoard 中记录的准确率和损失,以评估使用该调度器时的性能表现。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR

writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置,使得我们能够手动输入命令行参数
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints')
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")
args = parser.parse_args()

# 超参数设置
EPOCH = 50
pre_epoch = 0  # 添加 pre_epoch 的定义
BATCH_SIZE = 128
LR = 0.1

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 模型定义-ResNet
net = ResNet18().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)

# 初始化学习率调度器
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCH)

# 训练
if __name__ == "__main__":
    best_acc = 85
    print("Start Training, Resnet-18!")
    with open("acc.txt", "w") as f:
        with open("log.txt", "w") as f2:
            for epoch in range(pre_epoch, EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                             % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()

                scheduler.step()  # 更新学习率(余弦退火)

                # 每训练完一个epoch测试一下准确率
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    test_loss = 0.0
                    for data in testloader:
                        net.eval()
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = net(images)
                        loss = criterion(outputs, labels)
                        test_loss += loss.item()
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum()
                    test_loss /= len(testloader)
                    acc = 100. * correct / total
                    print('测试分类准确率为:%.3f%%' % acc)
                    print('测试平均损失为:%.3f' % test_loss)

                    writer.add_scalar("Accuracy/test", acc, epoch)
                    writer.add_scalar("Loss/test", test_loss, epoch)

                    print('Saving model......')
                    torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)
    writer.close()

结果
EPOCH=001,Accuracy= 42.310%
EPOCH=002,Accuracy= 47.890%
EPOCH=003,Accuracy= 64.050%
EPOCH=004,Accuracy= 65.870%
EPOCH=005,Accuracy= 67.530%
EPOCH=006,Accuracy= 74.500%
EPOCH=007,Accuracy= 75.850%
EPOCH=008,Accuracy= 77.010%
EPOCH=009,Accuracy= 75.640%
EPOCH=010,Accuracy= 80.280%
EPOCH=011,Accuracy= 78.570%
EPOCH=012,Accuracy= 78.440%
EPOCH=013,Accuracy= 81.430%
EPOCH=014,Accuracy= 78.180%
EPOCH=015,Accuracy= 81.310%
EPOCH=016,Accuracy= 81.330%
EPOCH=017,Accuracy= 82.780%
EPOCH=018,Accuracy= 84.130%
EPOCH=019,Accuracy= 86.330%
EPOCH=020,Accuracy= 85.930%
EPOCH=021,Accuracy= 83.240%
EPOCH=022,Accuracy= 85.170%
EPOCH=023,Accuracy= 84.940%
EPOCH=024,Accuracy= 87.320%
EPOCH=025,Accuracy= 85.530%
EPOCH=026,Accuracy= 87.830%
EPOCH=027,Accuracy= 88.000%
EPOCH=028,Accuracy= 86.530%
EPOCH=029,Accuracy= 86.850%
EPOCH=030,Accuracy= 88.650%
EPOCH=031,Accuracy= 88.960%
EPOCH=032,Accuracy= 89.220%
EPOCH=033,Accuracy= 89.670%
EPOCH=034,Accuracy= 90.310%
EPOCH=035,Accuracy= 90.220%
EPOCH=036,Accuracy= 90.890%
EPOCH=037,Accuracy= 91.430%
EPOCH=038,Accuracy= 91.570%
EPOCH=039,Accuracy= 92.370%
EPOCH=040,Accuracy= 92.770%
EPOCH=041,Accuracy= 92.960%
EPOCH=042,Accuracy= 93.420%
EPOCH=043,Accuracy= 93.540%
EPOCH=044,Accuracy= 93.810%
EPOCH=045,Accuracy= 93.930%
EPOCH=046,Accuracy= 94.010%
EPOCH=047,Accuracy= 94.110%
EPOCH=048,Accuracy= 94.060%
EPOCH=049,Accuracy= 93.970%
EPOCH=050,Accuracy= 94.120%

在这里插入图片描述
在这里插入图片描述

SGD优化器+只使用 ReduceLROnPlateau 学习率调度器

代码

在这段代码中,我们只使用了 ReduceLROnPlateau 学习率调度器。每个 epoch 结束后,根据 sum_loss 调整学习率。其他部分保持不变。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import argparse
from resnet import ResNet18
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau

writer = SummaryWriter(log_dir='runs/Log_SGDAndSchedulers_3')

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 参数设置,使得我们能够手动输入命令行参数
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints')
parser.add_argument('--net', default='./model/Resnet18.pth', help="path to net (to continue training)")
args = parser.parse_args()

# 超参数设置
EPOCH = 50
pre_epoch = 0  # 添加 pre_epoch 的定义
BATCH_SIZE = 128
LR = 0.1

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# 模型定义-ResNet
net = ResNet18().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)

# 初始化学习率调度器
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

# 训练
if __name__ == "__main__":
    best_acc = 85
    print("Start Training, Resnet-18!")
    with open("acc.txt", "w") as f:
        with open("log.txt", "w") as f2:
            for epoch in range(pre_epoch, EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                for i, data in enumerate(trainloader, 0):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += predicted.eq(labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                             % (epoch + 1, (i + 1 + epoch * len(trainloader)), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()

                # 每个epoch结束后检查是否需要通过plateau调整学习率
                scheduler.step(sum_loss)

                # 每训练完一个epoch测试一下准确率
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    test_loss = 0.0
                    for data in testloader:
                        net.eval()
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = net(images)
                        loss = criterion(outputs, labels)
                        test_loss += loss.item()
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum()
                    test_loss /= len(testloader)
                    acc = 100. * correct / total
                    print('测试分类准确率为:%.3f%%' % acc)
                    print('测试平均损失为:%.3f' % test_loss)

                    writer.add_scalar("Accuracy/test", acc, epoch)
                    writer.add_scalar("Loss/test", test_loss, epoch)

                    print('Saving model......')
                    torch.save(net.state_dict(), '%s/net_%03d.pth' % (args.outf, epoch + 1))
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)
    writer.close()
   
  • 6
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
ResNet(Residual Network)是由微软亚洲研究院提出的一种深度神经网络模型,它可以有效地训练超过1000层的深度神经网络。ResNet的主要思想是通过残差学习来解决深度神经网络训练中的梯度消失和梯度爆炸问题。 在ResNet中,每个基本块都包含两个卷积层,其中第一个卷积层使用3x3的卷积核,第二个卷积层使用1x1的卷积核。在这个基本块的输入和输出之间添加了一个跳过连接,这个跳过连接可以使得输入直接到达输出,从而避免了在网络中引入额外的噪声和不必要的复杂度。 ResNet优化方法包括以下几个方面: 1. 残差学习:通过残差学习使得网络可以更好地训练深度神经网络。 2. 批量归一化(Batch Normalization):通过批量归一化可以加速神经网络的收敛速度,同时还可以提高模型的泛化能力。 3. 激活函数:在ResNet使用了ReLU激活函数,这种激活函数可以避免梯度消失问题,并且可以使得网络具有更好的非线性拟合能力。 4. 正则化:通过正则化可以防止过拟合,ResNet使用的正则化方法包括L1正则化和L2正则化。 5. 学习率调度:通过学习率调度可以使得网络的训练更加稳定和高效,常用的学习率调度方法包括学习率衰减和动态学习率调度。 6. 数据增强:通过数据增强可以增加数据的多样性,从而提高模型的泛化能力。在ResNet中,常用的数据增强方法包括随机裁剪、翻转、旋转等。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值