【12】手写数据集的多层感知器模型

最新推荐文章于 2023-04-27 20:31:05 发布

要努力的小菜鸟

最新推荐文章于 2023-04-27 20:31:05 发布

阅读量149

点赞数

分类专栏： pytorch深度学习实例基础文章标签：深度学习 python pytorch

本文链接：https://blog.csdn.net/DBling1205/article/details/128618769

版权

pytorch深度学习实例基础专栏收录该内容

13 篇文章 1 订阅

订阅专栏

数据、代码等相关资料来源于b站日月光华老师视频，此博客作为学习记录。

未经softmax激活的输出一般称做logits，他也能返回正确结果。softmax本质就是个归一化，把概率值进行归一化，让所有的结果加起来等于1。比如三类：没softmax之前可能是[3,7,11],归一化以后就变成了[0.0003,0.0179,0.982]。
argmax方法：返回概率最大的值的索引。调用方法:torch.argmax。

一、各部分代码

首先导入包，并对数据集进行封装：

import torch
from torch import nn
import torch.utils.data
import torchvision
from torchvision.transforms import ToTensor
import numpy as np

train_ds = torchvision.datasets.MNIST('data', train=True, transform=ToTensor(), download=True)
test_ds = torchvision.datasets.MNIST('data', train=False, transform=ToTensor(), download=True)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=False)

编写网络代码进行实例化：

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(28*28, 120)  # 输入图像变成长28*28的一维feature，连接120个单元
        self.linear_2 = nn.Linear(120, 84)  # 中间层，接上层的120个输出做输入，输出成84个
        self.linear_3 = nn.Linear(84, 10)   #接上层84个输出做输入，0~9共10个数，故输出10个
    # 10在这里就是类别总数。
    # 模型的输出是C个可能值上的概率，C表示类别综述，最后哪个概率最高，就判别为哪一类
    def forward(self, input):
        x = input.view(-1,1*28*28)  # 对input进行展平，“-1”代表batch
        x = torch.relu(self.linear_1(x))
        x = torch.relu(self.linear_2(x))
        logits = self.linear_3(x)   # 最后输出层一般不需要激活。logits：未激活层的输出
        return logits
# 定义损失函数
loss_fn = torch.nn.CrossEntropyLoss()
# 优化：根据计算得到的损失，调整模型参数，降低损失的过程
# 实例化
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001)

编写训练循环：

# 编写训练循环
def train(train_dl,model,loss_fn,optimizer): # model在dl上训练，optimizer用loss_fn对其进行优化
    size = len(train_dl.dataset)   # 获取当前数据集的总样本数
    num_batches = len(train_dl)    # 获取批次数=数据集大小/批次大小

    train_loss,correct = 0,0 # 累加训练loss和正确次数

    # 在一个for循环里遍历了一整个dataloader里的所有数据
    for x,y in train_dl:   # x和y是input和target
        x,y = x.to(device),y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            # 预测对的样本总数：找出概率最大的索引的数据看是否和y相等，把返回的布尔值转换类型为0或1，把结果都加起来，即为总共预测对的个数。item转换成float类型
            train_loss += loss.item()
    correct /= size   # 总共预测对的/总数 = 正确率
    train_loss /= num_batches  # 总损失/批次数  = 平均损失
    return correct,train_loss

相似的，编写测试循环：

# 编写测试循环
def test(test_dl, model, loss_fn):  # model在dl上训练，optimizer用loss_fn对其进行优化
    size = len(test_dl.dataset)  # 获取当前数据集的总样本数
    num_batches = len(test_dl)  # 获取批次数=数据集大小/批次大小
    test_loss, correct = 0, 0  # 累加训练loss和正确次数

    # 在一个for循环里遍历了一整个dataloader里的所有数据
    with torch.no_grad():   # 测试数据集都不需要跟踪梯度
        for x, y in test_dl:  # x和y是input和target
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            test_loss += loss.item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        correct /= size  # 总共预测对的/总数 = 正确率
        test_loss /= num_batches  # 总损失/批次数  = 平均损失
        return correct, test_loss

训练50个epoch，每一个epoch代表将全部数据集训练一遍；建立一个模板，按模板把每个epoch的loss和准确率打印出来：

for epoch in range(epochs):
    epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
    train_acc.append(epoch_acc)
    train_loss.append(epoch_loss)
    test_acc.append(epoch_test_acc)
    test_loss.append(test_loss)

    template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
    print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
print('Done')

运行代码可见：
在这里插入图片描述
把loss和acc做成曲线图进行展示：

plt.plot(range(epochs), train_loss, label='train_loss')
plt.plot(range(epochs), test_loss, label='test_loss')
plt.legend()
plt.show()
plt.plot(range(epochs), train_acc, label='train_acc')
plt.plot(range(epochs), test_acc, label='test_acc')
plt.legend()
plt.show()

二、通用fit函数进行改进

定义了一个fit函数，将训练过程进行了封装。以后再训练的时候只需要调用fit函数就可以了。

def fit(epochs,train_dl, test_dl, model, loss_fn, opt):
    train_loss = []
    train_acc = []
    test_loss = []
    test_acc = []

    for epoch in range(epochs):
        epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
        epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
        train_acc.append(epoch_acc)
        train_loss.append(epoch_loss)
        test_acc.append(epoch_test_acc)
        test_loss.append(test_loss)

        template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
        print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
    print('Done')
    return train_loss,train_acc,test_loss,test_acc

(train_loss,train_acc,test_loss,test_acc) = fit(20, train_dl,test_dl,model,loss_fn,opt)

三、完整代码

import torch
from torch import nn
import torch.utils.data
import torchvision
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

train_ds = torchvision.datasets.MNIST('data', train=True, transform=ToTensor(), download=True)
test_ds = torchvision.datasets.MNIST('data', train=False, transform=ToTensor(), download=True)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=False)

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(28*28, 120)  # 输入图像变成长28*28的一维feature，连接120个单元
        self.linear_2 = nn.Linear(120, 84)  # 中间层，接上层的120个输出做输入，输出成84个
        self.linear_3 = nn.Linear(84, 10)   #接上层84个输出做输入，0~9共10个数，故输出10个
    # 10在这里就是类别总数。
    # 模型的输出是C个可能值上的概率，C表示类别综述，最后哪个概率最高，就判别为哪一类
    def forward(self, input):
        x = input.view(-1,1*28*28)  # 对input进行展平，“-1”代表batch
        x = torch.relu(self.linear_1(x))
        x = torch.relu(self.linear_2(x))
        logits = self.linear_3(x)   # 最后输出层一般不需要激活。logits：未激活层的输出
        return logits
# 定义损失函数
loss_fn = torch.nn.CrossEntropyLoss()
# 优化：根据计算得到的损失，调整模型参数，降低损失的过程

# 实例化
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Model().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001)

# 编写训练循环
def train(train_dl,model,loss_fn,optimizer): # model在dl上训练，optimizer用loss_fn对其进行优化
    size = len(train_dl.dataset)   # 获取当前数据集的总样本数
    num_batches = len(train_dl)    # 获取批次数=数据集大小/批次大小

    train_loss,correct = 0,0 # 累加训练loss和正确次数

    # 在一个for循环里遍历了一整个dataloader里的所有数据
    for x,y in train_dl:   # x和y是input和target
        x,y = x.to(device),y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            # 预测对的样本总数：找出概率最大的索引的数据看是否和y相等，把返回的布尔值转换类型为0或1，把结果都加起来，即为总共预测对的个数。item转换成float类型
            train_loss += loss.item()
    correct /= size   # 总共预测对的/总数 = 正确率
    train_loss /= num_batches  # 总损失/批次数  = 平均损失
    return correct,train_loss

# 编写测试循环
def test(test_dl, model, loss_fn):  # model在dl上训练，optimizer用loss_fn对其进行优化
    size = len(test_dl.dataset)  # 获取当前数据集的总样本数
    num_batches = len(test_dl)  # 获取批次数=数据集大小/批次大小
    test_loss, correct = 0, 0  # 累加训练loss和正确次数

    # 在一个for循环里遍历了一整个dataloader里的所有数据
    with torch.no_grad():   # 测试数据集都不需要跟踪梯度
        for x, y in test_dl:  # x和y是input和target
            x, y = x.to(device), y.to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            test_loss += loss.item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        correct /= size  # 总共预测对的/总数 = 正确率
        test_loss /= num_batches  # 总损失/批次数  = 平均损失
        return correct, test_loss
# 训练50个epoch，每一个epoch代表将全部数据集训练一遍
def fit(epochs,train_dl, test_dl, model, loss_fn, opt):
    train_loss = []
    train_acc = []
    test_loss = []
    test_acc = []

    for epoch in range(epochs):
        epoch_acc, epoch_loss = train(train_dl,model,loss_fn ,opt)
        epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
        train_acc.append(epoch_acc)
        train_loss.append(epoch_loss)
        test_acc.append(epoch_test_acc)
        test_loss.append(test_loss)

        template = ('epoch:{:2d},train_loss:{:.5f},train_acc:{:.1f},test_loss:{:.5f},test_acc:{:.1f},')
        print(template.format(epoch, epoch_loss, epoch_acc*100, epoch_test_loss, epoch_test_acc*100))
    print('Done')
    return train_loss,train_acc,test_loss,test_acc

(train_loss,train_acc,test_loss,test_acc) = fit(20, train_dl,test_dl,model,loss_fn,opt)