手写数字识别(全连接 / CNN),包含cpu、cuda、DirectML版本,可直接运行

pip install numpy torchvision matplotlib
  1. 第一遍运行train时会自动下载数据集;
  2. 更正了models文件夹不存在的bug;
  3. 添加了cuda;
  4. 为AMD显卡添加了DirectML(dml)测试代码;
  5. 添加了CNN的实现方式

1. 2D展平全连接方式

CPU版本:

pip install torch

main_cpu.py :

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x

def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=15, shuffle=True)

def evaluate(test_data, net):
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total

def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在,如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path

def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)

def load_model(net):
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path))

def train(net, train_data, test_data, epochs):
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            net.zero_grad()
            output = net.forward(x.view(-1, 28*28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)

def infer(net, test_data, start, end):
    net.eval()
    load_model(net)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            predict = torch.argmax(net.forward(x[0].view(-1, 28*28)))
            plot.figure(n)
            plot.imshow(x[0].view(28, 28))
            plot.title("prediction: " + str(int(predict)))
            plot.show()

def main():
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net()

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end)

if __name__ == "__main__":
    main()

CUDA版本:

重新安装cuda版本pytorch(win):
cuda 11.8:

控制台安装:
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

cuda 12.1(12.3可用):

控制台安装:
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
直链:
https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp39-cp39-win_amd64.whl
	pip install 你的文件路径+文件名

pytorch官网:https://pytorch.org/get-started/locally/

main_cuda.py :

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500,不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在,如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            output = net(x.view(-1, 28 * 28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

训练时任务管理器,gpu切换video encode为cuda,若有占用并且gpu专用内存增加,则有调用gpu加速(约一倍)

DirectML版本:

安装dml版本torch(win):具体参考其他教程
conda list

具体修改:

开头添加:

import torch_directml
dml = torch_directml.device(0)

main修改:

    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml

完整版 main_dml.py::

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
import torch_directml

dml = torch_directml.device(0)

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500,不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在,如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            output = net(x.view(-1, 28 * 28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

测试结果如下:1% —> 52%
在这里插入图片描述

2. CNN方式

添加了两个卷积层和两个最大池化层。每个卷积层后面都跟着一个ReLU激活函数和一个池化层。在全连接层之前,将数据展平成一维向量。
全连接层的结构与原先的网络相同。

具体修改:

1.创建CNN类并替换原先的Net:

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1个输入通道(灰度图),32个输出通道
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 最大池化层
        # 全连接层
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # 64个通道, 图片大小减半两次后为7x7
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # 卷积层和池化层
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        # 展平
        x = x.view(-1, 64 * 7 * 7)
        # 全连接层
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.log_softmax(self.fc3(x), dim=1)
        return x
# net = Net().to(device)  
net = CNN().to(device)  

2.train、evaluate 和infer 函数:

分别修改:

# output = net(x.view(-1, 28 * 28))
output = net(x)
# outputs = net.forward(x.view(-1, 28 * 28))    #展平
outputs = net(x)  # 直接使用x,不再展平
# predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
predict = torch.argmax(net(x[0].unsqueeze(0)))  # 使用单个图像的批处理

完整版:

main_dml_cnn.py :

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
import torch_directml

# 选择 DirectML 设备(如果适用)
dml = torch_directml.device(0)


os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1个输入通道(灰度图),32个输出通道
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 最大池化层
        # 全连接层
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # 64个通道, 图片大小减半两次后为7x7
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # 卷积层和池化层
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        # 展平
        x = x.view(-1, 64 * 7 * 7)
        # 全连接层
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.log_softmax(self.fc3(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500,不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            # outputs = net.forward(x.view(-1, 28 * 28))    #展平
            outputs = net(x)  # 直接使用x,不再展平
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在,如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            # output = net(x.view(-1, 28 * 28))
            output = net(x)
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            # predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            predict = torch.argmax(net(x[0].unsqueeze(0)))  # 使用单个图像的批处理
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = CNN().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

其余例如cuda和cpu同理。

使用CNN之后,相同训练速度前提下,准确度大大提升

  • 8
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值