数据、代码等相关资料来源于b站日月光华老师视频,此博客作为学习记录。
未经softmax激活的输出一般称做logits,他也能返回正确结果。softmax本质就是个归一化,把概率值进行归一化,让所有的结果加起来等于1。比如三类:没softmax之前可能是[3,7,11],归一化以后就变成了[0.0003,0.0179,0.982]。
argmax方法:返回概率最大的值的索引。调用方法:torch.argmax
。
一、各部分代码
首先导入包,并对数据集进行封装:
import torch
from torch import nn
import torch.utils.data
import torchvision
from torchvision.transforms import ToTensor
import numpy as np
train_ds = torchvision.datasets.MNIST('data', train=True, transform=ToTensor(), download=True)
test_ds = torchvision.datasets.MNIST('data', train=False, transform=ToTensor(), download=True)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=False)
编写网络代码进行实例化:
class Model(nn.Module):
def __init__(self):
super().__init__()
self.linear_1 = nn.Linear(28*28, 120) # 输入图像变成长28*28的一维feature,连接120个单元
self.linear_2 = nn.Linear(120, 84) # 中间层,接上层的120个输出做输入,输出成84个
self.linear_3 = nn.Linear(84, 10) #接上层84个输出做输入,0~9共10个数,故输出10个
# 10在这里就是类别总数。
# 模型的输出是C个可能值上的概率,C表示类别综述,最后哪个概率最高,就判别为哪一类
def forward(self, input):
x = input.view(-1,1*28*28) # 对input进行展平,“-1”代表batch
x = torch.relu(self.linear_1(x))
x = torch.relu(self.linear_2(x))
logits = self.linear_3(x) # 最后输出层一般不需要激活。logits:未激活层的输出
return logits
# 定义损失函数
loss_fn = torch.nn.CrossEntropyLoss()
# 优化:根据计算得到的损失,调整模型参数,降低损失的过程
# 实例化
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001)
编写训练循环:
# 编写训练循环
def train(train_dl,model,loss_fn,optimizer): # model在dl上训练,optimizer用loss_fn对其进行优化
size = len(train_dl.dataset) # 获取当前数据集的总样本数
num_batches = len(train_dl) # 获取批次数=数据集大小/批次大小
train_loss,correct = 0,0 # 累加训练loss和正确次数
# 在一个for循环里遍历了一整个dataloader里的所有数据
for x,y in train_dl: # x和y是input和target
x,y = x.to(device),y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# 预测对的样本总数:找出概率最大的索引的数据看是否和y相等,把返回的布尔值转换类型为0或1,把结果都加起来,即为总共预测对的个数。item转换成float类型
train_loss += loss.item()
correct /= size # 总共预测对的/总数 = 正确率
train_loss /= num_batches # 总损失/批次数 = 平均损失
return correct,train_loss
相似的,编写测试循环:
# 编写测试循环
def test(test_dl, model, loss_fn): # model在dl上训练,optimizer用loss_fn对其进行优化
size = len(test_dl.dataset) # 获取当前数据集的总样本数
num_batches = len(test_dl) # 获取批次数=数据集大小/批次大小
test_loss, correct = 0, 0 # 累加训练loss和正确次数
# 在一个for循环里遍历了一整个dataloader里的所有数据
with torch.no_grad(): # 测试数据集都不需要跟踪梯度
for x, y in test_dl: # x和y是input和target
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
test_loss += loss.item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
correct /= size # 总共预测对的/总数 = 正确率
test_loss /= num_batches # 总损失/批次数 = 平均损失
return correct, test_loss
训练50个epoch,每一个epoch代表将全部数据集训练一遍;建立一个模板,按模板把每个epoch的loss和准确率打印出来:
for epoch in range(epochs):
epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
train_acc.append(epoch_acc)
train_loss.append(epoch_loss)
test_acc.append(epoch_test_acc)
test_loss.append(test_loss)
template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
print('Done')
运行代码可见:
把loss和acc做成曲线图进行展示:
plt.plot(range(epochs), train_loss, label='train_loss')
plt.plot(range(epochs), test_loss, label='test_loss')
plt.legend()
plt.show()
plt.plot(range(epochs), train_acc, label='train_acc')
plt.plot(range(epochs), test_acc, label='test_acc')
plt.legend()
plt.show()
二、通用fit函数进行改进
定义了一个fit函数,将训练过程进行了封装。以后再训练的时候只需要调用fit函数就可以了。
def fit(epochs,train_dl, test_dl, model, loss_fn, opt):
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
train_acc.append(epoch_acc)
train_loss.append(epoch_loss)
test_acc.append(epoch_test_acc)
test_loss.append(test_loss)
template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
print('Done')
return train_loss,train_acc,test_loss,test_acc
(train_loss,train_acc,test_loss,test_acc) = fit(20, train_dl,test_dl,model,loss_fn,opt)
三、完整代码
import torch
from torch import nn
import torch.utils.data
import torchvision
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
train_ds = torchvision.datasets.MNIST('data', train=True, transform=ToTensor(), download=True)
test_ds = torchvision.datasets.MNIST('data', train=False, transform=ToTensor(), download=True)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=False)
class Model(nn.Module):
def __init__(self):
super().__init__()
self.linear_1 = nn.Linear(28*28, 120) # 输入图像变成长28*28的一维feature,连接120个单元
self.linear_2 = nn.Linear(120, 84) # 中间层,接上层的120个输出做输入,输出成84个
self.linear_3 = nn.Linear(84, 10) #接上层84个输出做输入,0~9共10个数,故输出10个
# 10在这里就是类别总数。
# 模型的输出是C个可能值上的概率,C表示类别综述,最后哪个概率最高,就判别为哪一类
def forward(self, input):
x = input.view(-1,1*28*28) # 对input进行展平,“-1”代表batch
x = torch.relu(self.linear_1(x))
x = torch.relu(self.linear_2(x))
logits = self.linear_3(x) # 最后输出层一般不需要激活。logits:未激活层的输出
return logits
# 定义损失函数
loss_fn = torch.nn.CrossEntropyLoss()
# 优化:根据计算得到的损失,调整模型参数,降低损失的过程
# 实例化
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001)
# 编写训练循环
def train(train_dl,model,loss_fn,optimizer): # model在dl上训练,optimizer用loss_fn对其进行优化
size = len(train_dl.dataset) # 获取当前数据集的总样本数
num_batches = len(train_dl) # 获取批次数=数据集大小/批次大小
train_loss,correct = 0,0 # 累加训练loss和正确次数
# 在一个for循环里遍历了一整个dataloader里的所有数据
for x,y in train_dl: # x和y是input和target
x,y = x.to(device),y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
# 预测对的样本总数:找出概率最大的索引的数据看是否和y相等,把返回的布尔值转换类型为0或1,把结果都加起来,即为总共预测对的个数。item转换成float类型
train_loss += loss.item()
correct /= size # 总共预测对的/总数 = 正确率
train_loss /= num_batches # 总损失/批次数 = 平均损失
return correct,train_loss
# 编写测试循环
def test(test_dl, model, loss_fn): # model在dl上训练,optimizer用loss_fn对其进行优化
size = len(test_dl.dataset) # 获取当前数据集的总样本数
num_batches = len(test_dl) # 获取批次数=数据集大小/批次大小
test_loss, correct = 0, 0 # 累加训练loss和正确次数
# 在一个for循环里遍历了一整个dataloader里的所有数据
with torch.no_grad(): # 测试数据集都不需要跟踪梯度
for x, y in test_dl: # x和y是input和target
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
test_loss += loss.item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
correct /= size # 总共预测对的/总数 = 正确率
test_loss /= num_batches # 总损失/批次数 = 平均损失
return correct, test_loss
# 训练50个epoch,每一个epoch代表将全部数据集训练一遍
def fit(epochs,train_dl, test_dl, model, loss_fn, opt):
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
train_acc.append(epoch_acc)
train_loss.append(epoch_loss)
test_acc.append(epoch_test_acc)
test_loss.append(test_loss)
template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
print('Done')
return train_loss,train_acc,test_loss,test_acc
(train_loss,train_acc,test_loss,test_acc) = fit(20, train_dl,test_dl,model,loss_fn,opt)