逐句讲解:在PyTorch框架下使用gpu训练MNIST手写数据集(附代码)

from os.path import join
from tqdm import tqdm
import pandas as pd

import torchvision
import torch
import torch.nn as nn

#  设置device,使得网络和数据能够在gpu上运行
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


#  定义卷积+激活函数类,定义方法要继承nn.Module
class Conv2dTanh(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=(1, 1), padding=(0, 0), bias=True):
        super(Conv2dTanh, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.bias = bias

        self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
                              stride=stride, padding=padding, bias=bias)

    def forward(self, x):
        """
        Args:
            x: [N,C,H,W]
        """
        o1 = self.conv(x)
        o2 = torch.tanh(o1)
        return o2


#  定义特征提取类
class Features(nn.Module):
    def __init__(self, padding):
        super(Features, self).__init__()

        self.padding = padding
        self.conv2dtanh1 = Conv2dTanh(in_channels=1, out_channels=6, kernel_size=5, padding=padding)
        self.avgpool1 = nn.AvgPool2d(kernel_size=2)
        self.conv2dtanh2 = Conv2dTanh(in_channels=6, out_channels=16, kernel_size=5)
        self.avgpool2 = nn.AvgPool2d(kernel_size=2)
        self.conv2dtanh3 = Conv2dTanh(in_channels=16, out_channels=120, kernel_size=5)

    def forward(self, x):
        """
        Args:
            x: [N,1,H,W]
            x: [Batchnum, 1, Height, Weight]
            x: [200, 1, 28, 28]
            o1: [200, 6, 28, 28]
            o2: [200, 6, 14, 14]
            o3: [200, 16, 10, 10]
            o4: [200, 16, 5, 5]
            o5: [200, 120, 1, 1]
        """
        o1 = self.conv2dtanh1(x)
        o2 = self.avgpool1(o1)
        o3 = self.conv2dtanh2(o2)
        o4 = self.avgpool2(o3)
        o5 = self.conv2dtanh3(o4)
        return o5


#  定义线性分类层,o3输出后没有定义激活函数,需要后定义softmax
class Classifier(nn.Module):
    def __init__(self, num_classes):
        super(Classifier, self).__init__()

        self.num_classes = num_classes

        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=num_classes)

    def forward(self, x):
        """
        Args:
            x: [N,120]
        """
        o1 = self.fc1(x)
        o2 = torch.tanh(o1)
        o3 = self.fc2(o2)
        return o3


#  定义网络模型,调用上面方法类
class LeNet5(nn.Module):
    def __init__(self, num_classes=10, padding=0):
        super(LeNet5, self).__init__()

        self.num_classes = num_classes
        self.padding = padding

        self.features = Features(padding=padding)
        self.flatten = nn.Flatten()
        self.classifier = Classifier(num_classes=num_classes)

    def forward(self, x):
        """
        Args:
            x: [N,1,H,W]
            flatten: [Batchnum, channels, 1, 1]-> [Batchnum, channels]
        """
        o1 = self.features(x)
        o2 = self.flatten(o1)
        o3 = self.classifier(o2)
        #  softmax激活函数, dim=-1意味着dim=input.dim()
        o4 = torch.log_softmax(o3, dim=-1)
        return o4


#  定义数据集的读取方式dataloader,下载方式,方法已经封装好了
class Datasets:
    def __init__(self, dataset_path, batch_size):
        self.train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST(dataset_path, train=True, download=True,
                                       transform=torchvision.transforms.ToTensor()),
            batch_size=batch_size, shuffle=True)

        self.test_loader = torch.utils.data.DataLoader(
            torchvision.datasets.MNIST(dataset_path, train=False, download=True,
                                       transform=torchvision.transforms.ToTensor()),
            batch_size=batch_size * 2, shuffle=True)


#  训练方法类,包含训练,测试
class Trainer:
    def __init__(self, datasets, model, optimizer, loss_fn, results_path='results'):
        self.datasets = datasets
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.results_path = results_path
        self.train_df = None

    def train_epoch(self, msg_format):
        self.model.train()

        losses = []
        bar = tqdm(self.datasets.train_loader)
        for data, target in bar:
            #  将数据放入device显卡上
            data, target = data.to(device), target.to(device)
            #  把梯度.grad置零,如果不将梯度清零的话,梯度会与上一个batch的数据相关
            self.optimizer.zero_grad()

            output = self.model(data)
            loss = self.loss_fn(output, target)

            #  梯度自动计算,累加到.grad属性上(Batch)
            loss.backward()
            #  通过梯度下降法更新网络参数权重
            self.optimizer.step()
            #  设置loss值,val值等信息的显示格式
            bar.set_description(msg_format.format(loss.item()))
            #  记录loss信息
            losses.append(loss.item())
        return losses

    def test(self):
        #  防止网络更改参数,否则的话,有输入数据,即使不训练,它也会改变权值。这是model中含有batch normalization层所带来的的性质。
        self.model.eval()

        count = len(self.datasets.test_loader.dataset)
        test_loss = 0
        correct = 0
        #  被with torch.no_grad()包住的代码,不用跟踪反向梯度计算,即梯度函数状态不会更新
        with torch.no_grad():
            for data, target in self.datasets.test_loader:
                data, target = data.to(device), target.to(device)
                output = self.model(data)
                test_loss += self.loss_fn(output, target).item() * len(data)
                pred = output.data.max(1, keepdim=True)[1]
                correct += pred.eq(target.data.view_as(pred)).sum().item()

        return test_loss / count, correct / count

    def train(self, num_epoch):
        val_loss, accuracy = self.test()
        all_losses = [[None, val_loss, accuracy]]

        for epoch in range(num_epoch):
            # train
            train_losses = self.train_epoch(
                f'train {epoch}/{num_epoch} -- loss: {{:3.2f}}, val_loss: {val_loss:3.2f}, accuracy: {accuracy:.1%}')

            # test
            val_loss, accuracy = self.test()
            all_losses.extend([
                [train_loss, None, None]
                for train_loss in train_losses
            ])
            all_losses.append([None, val_loss, accuracy])

        self.save_model()
        #  定义pandas数据分析
        self.train_df = pd.DataFrame(data=all_losses, columns=["train_loss", "val_loss", "accuracy"])
        self.train_df.to_csv(join(self.results_path, "train.csv"), index=False)

    #  定义保存模型,这里在windows下可能无法生产results文件夹,自行创建
    def save_model(self):
        torch.save(self.model.state_dict(), join(self.results_path, 'model.pth'))

    def plot(self):
        import matplotlib.pyplot as plt
        self.train_df[["train_loss", "val_loss"]].ffill().plot(grid=True, logy=True)
        self.train_df[["accuracy"]].dropna().plot(grid=True)
        plt.show()


def train():
    torch.manual_seed(0)
    #  将网络模型放在device显卡上
    model = LeNet5(num_classes=10, padding=2).to(device)
    #  NLL:负对数似然
    loss_fn = torch.nn.NLLLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
    trainer = Trainer(Datasets("datasets", 100), model=model, optimizer=optimizer,
                      loss_fn=loss_fn, results_path="results")

    trainer.train(num_epoch=3)
    trainer.plot()

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是用PyTorch实现LeNet模型并训练Mnist数据集的示例代码: ```python import torch import torch.nn as nn import torch.optim as optim import torchvision.datasets as dsets import torchvision.transforms as transforms # 定义LeNet模型 class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1, 6, 5) self.pool1 = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(6, 16, 5) self.pool2 = nn.MaxPool2d(kernel_size=2) self.fc1 = nn.Linear(16*4*4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool1(torch.relu(self.conv1(x))) x = self.pool2(torch.relu(self.conv2(x))) x = x.view(-1, 16*4*4) x = torch.relu(self.fc1(x)) x = torch.relu(self.fc2(x)) x = self.fc3(x) return x # 设置超参数 batch_size = 100 learning_rate = 0.001 num_epochs = 5 # 加载Mnist数据集 train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True) # 创建数据加载器 train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # 创建LeNet模型和优化器 model = LeNet() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 训练模型 total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images labels = labels # 前向传播 outputs = model(images) loss = nn.CrossEntropyLoss()(outputs, labels) # 反向传播和优化 optimizer.zero_grad() loss.backward() optimizer.step() # 输出训练状态 if (i+1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # 测试模型 model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images labels = labels outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) ``` 这个代码可以在Mnist数据集训练LeNet模型,并在测试集上计算模型的准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值