以上图网络为例:
1 主干网络模型的搭建(推荐配套学习视频:PyTorch深度学习快速入门教程(绝对通俗易懂!)【小土堆】_哔哩哔哩_bilibili)
1.1 创建一个python文件model.py,搭建主干网络模型
import torch
from torch import nn
class Tudui(nn.Module):
def __init__(self) -> None:
super().__init__()
self.model=nn.Sequential(
nn.Conv2d(3,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10)
)
def forward(self,x):
x=self.model(x)
return x
#main函数
if __name__ == '__main__':
tudui=Tudui()
#验证网络模型的正确性,创造一个输入尺寸,判断输出尺寸是不是我们想要的
input=torch.ones((64,3,32,32))#batchsize=64,channel=3,尺寸32*32
output=tudui(input)
print(output.shape)
1.2 创建另一个python文件train.py用于训练和测试,引入刚才创建的模型from model import *
1)准备数据集:
#训练集
train_data=torchvision.datasets.CIFAR10("dataset",train=True,transform=torchvision.transforms.ToTensor(),download=True)
#测试集
test_data=torchvision.datasets.CIFAR10("dataset",train=False,transform=torchvision.transforms.ToTensor(),download=True)
#查看数据集有多少数据
train_data_size=len(train_data)
test_data_size=len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("训练数据集的长度为:{}".format(test_data_size))
2) 利用dataloader加载数据集
train_dataloader=DataLoader(train_data,batch_size=64)#相当于将数据打包为一个batch一个batch的加载
test_databoader=DataLoader(test_data,batch_size=64)
3) 创建网络模型
tudui=Tudui()
4)定义损失函数
loss_fn=nn.CrossEntropyLoss()#交叉熵损失函数多用于分类问题
5)定义优化器
# learning_rate=0.01#学习速率 1e-2=1x(10)^(-2)
learning_rate=1e-2
optimizer=torch.optim.SGD(tudui.parameters(),lr=learning_rate)#params:网络模型
6)设置训练网络的参数
#记录训练的次数
total_train_step=0
#记录测试的册数次
total_test_step=0
#训练的轮数
epoch=10
#添加tensorboard
writer=SummaryWriter("logs_train")
7)设置训练轮数、训练步骤开始
for i in range(epoch):#设置训练轮数
print("----------第{}轮训练开始--------------".format(i+1))
#训练步骤开始
# 并不是说把网络设置为训练模式才可以训练
#作用是:当模块中有Dropout, BatchNor层时,一定要调用他,其对特定模块起作用,没有时也可以加它
tudui.train()
for data in train_dataloader:#从dataloader中一个batch一个batch的取数据
imgs,targets=data
output=tudui(imgs)#真实输出
#(1)计算真实输出与目标之间的误差
loss=loss_fn(output,targets)
#(2)优化器调优 优化模型
optimizer.zero_grad()#梯度清零
loss.backward()#反向传播,得到每个参数的梯度
optimizer.step()#对每个梯度进行优化
total_train_step=total_train_step+1#记录训练次数,一个batch一次
if total_train_step%100==0:#如果训练次数能整除100,输出,因为每次都输出太多太咋
print("训练次数:{},Loss:{}".format(total_train_step,loss.item()))
writer.add_scalar("train_losss",loss.item(),total_train_step)
#.item()作用:将tensor型输出为整数
8)测试
#(1)设置测试 参数
total_test_loss=0#总损失
total_accuracy=0#总体测试正确的个数
#(2)测试步骤开始,
# 作用是:当模块中有Dropout, BatchNor层时,一定要调用他,其对特定模块起作用
tudui.eval()
with torch.no_grad():#将网络模型中的梯度消失,只需要测试,不需要对梯度进行调整,也不需要利用梯度来优化
for data in test_databoader:#从测试集中取数据
imgs,targets=data
outputs=tudui(imgs)#
loss=loss_fn(outputs,targets)#计算损失
total_test_loss=total_test_loss+loss.item()#计算总损失
# 求每个对应位置最大的值和targets比较返回true或false。利用sum求和
accuracy=(outputs.argmax(1)==targets).sum()
total_accuracy=total_accuracy+accuracy#计算测试正确的总个数
9)计算loss,正确率,以此展现训练网络在测试集上的效果
print("整体测试集上的Loss: {}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy/test_data_size))#正确率为测试正确的个数/测试集总个数
#将结果在tensorboard上展示
writer.add_scalar("test_accuracy",total_accuracy/test_data_size,total_test_step)
writer.add_scalar("test_loss",total_test_loss,total_test_step)
total_test_step=total_test_step+baba
10)保存模型(在特定步数或某一轮保存模型)
#方式1:保存模型
torch.save(tudui,"tudui_{}.pth".format(i))#将模型保存在指定路径中
#方式2:保存模型
#torch.save(tudui.state_dict(),"tudui_{}.pth".format(i))
print("模型已保存")
完整代码:train.py
CPU训练:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
#准备数据集
train_data = torchvision.datasets.CIFAR10('data', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10('data', train=False, transform=torchvision.transforms.ToTensor(), download=True)
#数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集长度:{}".format(train_data_size))
print("测试数据集长度:{}".format(test_data_size))
#加载数据集(Dataloader)
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
#创建网络模型
tudui = Tudui()
#损失函数
loss_fn = nn.CrossEntropyLoss()
#优化器
#learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)
#参数
total_train_step = 0
total_test_step = 0
epoh = 10
#添加tensorbooard
writer = SummaryWriter("logs/trainlogs")
for i in range(epoh):
print("-----第{}轮训练-----".format(i+1))
#训练开始
tudui.train()#模型状态
for data in train_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
#优化器调优
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
#测试
tudui.eval()#模型状态
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("测试集loss:{}".format(total_test_loss))
print("测试集正确率: {}".format(total_accuracy / test_data_size))
total_test_step = total_test_step + 1
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
torch.save(tudui, "model/tudui_{}.pth".format(i))
writer.close()
GPU训练,方式1:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
#准备数据集
train_data = torchvision.datasets.CIFAR10('data', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10('data', train=False, transform=torchvision.transforms.ToTensor(), download=True)
#数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集长度:{}".format(train_data_size))
print("测试数据集长度:{}".format(test_data_size))
#加载数据集(Dataloader)
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
#创建网络模型
tudui = Tudui()
if torch.cuda.is_available():#1、网络模型
tudui = tudui.cuda()
#损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():#2、损失函数
loss_fn = loss_fn.cuda()
#优化器
#learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)
#参数
total_train_step = 0
total_test_step = 0
epoh = 10
#添加tensorbooard
writer = SummaryWriter("logs/trainlogs")
for i in range(epoh):
print("-----第{}轮训练-----".format(i+1))
#训练开始
tudui.train()#模型状态
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():#3、数据
imgs = imgs.cuda()
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
#优化器调优
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
#测试
tudui.eval()#模型状态
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
if torch.cuda.is_available():#3、数据
imgs = imgs.cuda()
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("测试集loss:{}".format(total_test_loss))
print("测试集正确率: {}".format(total_accuracy / test_data_size))
total_test_step = total_test_step + 1
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
torch.save(tudui, "model/tudui_{}.pth".format(i))
writer.close()
GPU训练方式2:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
#定义训练的设备
#device = torch.device("cpu")
device = torch.device("cuda:0")
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#准备数据集
train_data = torchvision.datasets.CIFAR10('data', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10('data', train=False, transform=torchvision.transforms.ToTensor(), download=True)
#数据集长度
train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集长度:{}".format(train_data_size))
print("测试数据集长度:{}".format(test_data_size))
#加载数据集(Dataloader)
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
#创建网络模型
tudui = Tudui()
tudui = tudui.to(device)
#损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
#优化器
#learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)
#参数
total_train_step = 0
total_test_step = 0
epoh = 10
#添加tensorbooard
writer = SummaryWriter("logs/trainlogs")
for i in range(epoh):
print("-----第{}轮训练-----".format(i+1))
#训练开始
tudui.train()#模型状态
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
#优化器调优
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step = total_train_step + 1
if total_train_step % 100 == 0:
print("训练次数:{},Loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
#测试
tudui.eval()#模型状态
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("测试集loss:{}".format(total_test_loss))
print("测试集正确率: {}".format(total_accuracy / test_data_size))
total_test_step = total_test_step + 1
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step)
torch.save(tudui, "model/tudui_{}.pth".format(i))
writer.close()