1.完整的模型训练思路
(1)准备数据集
(2)加载数据集
(3)建网络模型
(4)损失函数
(5)优化器
(6)设置训练网络参数
(7)开始训练
(8)测试验证
(9)数据可视化与模型保存
2.代码详解
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
# 准备数据集
train_dataset = datasets.CIFAR10(root="datasets", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.CIFAR10(root="datasets", train=False, transform=transforms.ToTensor(), download=True)
# 加载数据集
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
# 搭建神经网络
class My_Module(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.seq = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(in_features=1024, out_features=64),
nn.Linear(in_features=64, out_features=10)
)
def forward(self, x):
output = self.seq(x)
return output
my_model = My_Module()
# 损失函数
loss = nn.CrossEntropyLoss()
# 优化器
optimizer = optim.SGD(params=my_model.parameters(), lr=0.1)
# 设置训练网络的一些参数
total_train_step = 0 # 记录训练的次数
total_test_step = 0 # 记录测试的次数
epoch = 10 # 训练的轮数
# 数据可视化,tensorboard
writer = SummaryWriter("logs_train")
for i in range(epoch):
print("----------------------第{}轮训练开始-------------------".format(i + 1))
# 训练步骤开始
my_model.train() # 有Dropout、BatchNorm,etc时产生作用
for data in train_dataloader:
imgs, targets = data
output = my_model(imgs)
loss_result = loss(output, targets)
optimizer.zero_grad()
loss_result.backward()
optimizer.step()
total_train_step += 1
if total_train_step == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss_result.item()))
writer.add_scalar(tag="train_Loss", scalar_value=loss_result.item(), global_step=total_train_step)
# 测试步骤开始
my_model.eval() # 有Dropout、BatchNorm,etc时产生作用
total_test_loss = 0
test_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
output = my_model(imgs)
loss_result = loss(output, targets)
total_test_loss = total_test_loss + loss_result
accuracy = (output.argmax(1) == targets).sum() # argmax()返回某维度上最大值的坐标
test_accuracy = test_accuracy + accuracy
print("整体测试集的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率为:{}".format(test_accuracy/len(test_dataset)))
writer.add_scalar(tag="test_Loss", scalar_value=total_test_loss, global_step=total_test_step)
writer.add_scalar(tag="test_accuracy", scalar_value=test_accuracy, global_step=total_test_step)
total_test_step += 1
# 模型的保存
torch.save(my_model, "model_{}.pth".format(i + 1))
print("模型已保存")
writer.close()
3.利用GPU训练
3.1 方式一
- 模型.cuda()
- 数据.cuda()
- 损失函数.cuda()
# 搭建模型
my_model = My_Module()
if torch.cuda.is_available():
my_model = my_model.cuda()
# 损失函数
loss = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss = loss.cuda()
# 训练步骤开始
my_model.train() # 有Dropout、BatchNorm,etc时产生作用
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
# 测试步骤开始
my_model.eval() # 有Dropout、BatchNorm,etc时产生作用
total_test_loss = 0
test_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = imgs.cuda()
3.2 方式二
- device = torch.device(“cuda”)
- model = model.to(device)
- loss = loss.to(device)
- images = images.to(device)
- targets = targets.to(device)
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
# 准备数据集
train_dataset = datasets.CIFAR10(root="datasets", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.CIFAR10(root="datasets", train=False, transform=transforms.ToTensor(), download=True)
# 加载数据集
train_dataloader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)
# 搭建神经网络
class My_Module(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.seq = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(in_features=1024, out_features=64),
nn.Linear(in_features=64, out_features=10)
)
def forward(self, x):
output = self.seq(x)
return output
device = torch.device('cuda')
my_model = My_Module()
my_model = my_model.to(device)
# 损失函数
loss = nn.CrossEntropyLoss()
loss = loss.to(device)
# 优化器
optimizer = optim.SGD(params=my_model.parameters(), lr=0.1)
# 设置训练网络的一些参数
total_train_step = 0 # 记录训练的次数
total_test_step = 0 # 记录测试的次数
epoch = 10 # 训练的轮数
# 数据可视化,tensorboard
writer = SummaryWriter("logs_train")
for i in range(epoch):
print("----------------------第{}轮训练开始-------------------".format(i + 1))
# 训练步骤开始
my_model.train() # 有Dropout、BatchNorm,etc时产生作用
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = my_model(imgs)
loss_result = loss(output, targets)
optimizer.zero_grad()
loss_result.backward()
optimizer.step()
total_train_step += 1
if total_train_step == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss_result.item()))
writer.add_scalar(tag="train_Loss", scalar_value=loss_result.item(), global_step=total_train_step)
# 测试步骤开始
my_model.eval() # 有Dropout、BatchNorm,etc时产生作用
total_test_loss = 0
test_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = my_model(imgs)
loss_result = loss(output, targets)
total_test_loss = total_test_loss + loss_result
accuracy = (output.argmax(1) == targets).sum() # argmax()返回某维度上最大值的坐标
test_accuracy = test_accuracy + accuracy
print("整体测试集的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率为:{}".format(test_accuracy / len(test_dataset)))
writer.add_scalar(tag="test_Loss", scalar_value=total_test_loss, global_step=total_test_step)
writer.add_scalar(tag="test_accuracy", scalar_value=test_accuracy, global_step=total_test_step)
total_test_step += 1
# 模型的保存
torch.save(my_model, "model_{}.pth".format(i + 1))
print("模型已保存")
writer.close()
4.Mac利用M1orM2芯片进行加速
- device = torch.device(“mps”)
- model = model.to(device)
- loss = loss.to(device)
- images = images.to(device)
- targets = targets.to(device)