手写数字识别（全连接 / CNN），包含cpu、cuda、DirectML版本，可直接运行

wei_lan.

已于 2024-01-06 17:24:12 修改

阅读量494

点赞数 8

文章标签： python cnn

于 2024-01-01 20:10:48 首次发布

本文链接：https://blog.csdn.net/qq_44170127/article/details/135328299

版权

pip install numpy torchvision matplotlib

第一遍运行train时会自动下载数据集；
更正了models文件夹不存在的bug；
添加了cuda；
为AMD显卡添加了DirectML（dml）测试代码；
添加了CNN的实现方式

1. 2D展平全连接方式

CPU版本：

pip install torch

main_cpu.py :

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x

def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=15, shuffle=True)

def evaluate(test_data, net):
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total

def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在，如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path

def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)

def load_model(net):
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path))

def train(net, train_data, test_data, epochs):
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            net.zero_grad()
            output = net.forward(x.view(-1, 28*28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)

def infer(net, test_data, start, end):
    net.eval()
    load_model(net)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            predict = torch.argmax(net.forward(x[0].view(-1, 28*28)))
            plot.figure(n)
            plot.imshow(x[0].view(28, 28))
            plot.title("prediction: " + str(int(predict)))
            plot.show()

def main():
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net()

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end)

if __name__ == "__main__":
    main()

CUDA版本：

重新安装cuda版本pytorch（win）：
cuda 11.8:

控制台安装：
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

cuda 12.1(12.3可用):

控制台安装：
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
直链：
https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp39-cp39-win_amd64.whl
	pip install 你的文件路径+文件名

pytorch官网：https://pytorch.org/get-started/locally/

main_cuda.py ：

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500，不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在，如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            output = net(x.view(-1, 28 * 28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

训练时任务管理器，gpu切换video encode为cuda，若有占用并且gpu专用内存增加，则有调用gpu加速（约一倍）

DirectML版本：

安装dml版本torch（win）：具体参考其他教程
conda list

具体修改：

开头添加：

import torch_directml
dml = torch_directml.device(0)

main修改：

    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml

完整版 main_dml.py：：

import torch
import torch.nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
import torch_directml

dml = torch_directml.device(0)

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500，不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            outputs = net.forward(x.view(-1, 28 * 28))
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在，如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            output = net(x.view(-1, 28 * 28))
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = Net().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

测试结果如下：1% —> 52%
在这里插入图片描述

2. CNN方式

添加了两个卷积层和两个最大池化层。每个卷积层后面都跟着一个ReLU激活函数和一个池化层。在全连接层之前，将数据展平成一维向量。
全连接层的结构与原先的网络相同。

具体修改：

1.创建CNN类并替换原先的Net：

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1个输入通道（灰度图），32个输出通道
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 最大池化层
        # 全连接层
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # 64个通道, 图片大小减半两次后为7x7
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # 卷积层和池化层
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        # 展平
        x = x.view(-1, 64 * 7 * 7)
        # 全连接层
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.log_softmax(self.fc3(x), dim=1)
        return x

# net = Net().to(device)  
net = CNN().to(device)

2.train、evaluate 和infer 函数：

分别修改：

# output = net(x.view(-1, 28 * 28))
output = net(x)

# outputs = net.forward(x.view(-1, 28 * 28))    #展平
outputs = net(x)  # 直接使用x，不再展平

# predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
predict = torch.argmax(net(x[0].unsqueeze(0)))  # 使用单个图像的批处理

完整版：

main_dml_cnn.py ：

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
import matplotlib.pyplot as plot
import os
import torch_directml

# 选择 DirectML 设备（如果适用）
dml = torch_directml.device(0)


os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = torch.nn.Linear(28 * 28, 64)
        self.fc2 = torch.nn.Linear(64, 64)
        self.fc3 = torch.nn.Linear(64, 64)
        self.fc4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.fc3(x))
        x = torch.nn.functional.log_softmax(self.fc4(x), dim=1)
        return x


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 卷积层
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1个输入通道（灰度图），32个输出通道
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 最大池化层
        # 全连接层
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # 64个通道, 图片大小减半两次后为7x7
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # 卷积层和池化层
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        # 展平
        x = x.view(-1, 64 * 7 * 7)
        # 全连接层
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.log_softmax(self.fc3(x), dim=1)
        return x


def get_data_loader(is_train):
    to_tensor = transforms.Compose([transforms.ToTensor()])  # 创造转换器
    data_set = MNIST("", is_train, transform=to_tensor, download=True)
    return DataLoader(data_set, batch_size=500, shuffle=True)  # 修改了这里batch_size -> 500，不是越高越好


def evaluate(test_data, net, device):  # 修改了这里
    net.to(device)  # 修改了这里
    n_correct = 0
    n_total = 0
    with torch.no_grad():
        for (x, y) in test_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            # outputs = net.forward(x.view(-1, 28 * 28))    #展平
            outputs = net(x)  # 直接使用x，不再展平
            for i, output in enumerate(outputs):
                if torch.argmax(output) == y[i]:
                    n_correct += 1
                n_total += 1
    return n_correct / n_total


def save_model(net, epoch):
    model_path = f'models/model_epoch_{epoch}.pth'
    model_dir = os.path.dirname(model_path)
    # 检查文件夹是否存在，如果不存在则创建
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(net.state_dict(), model_path)
    return model_path


def update_config(model_path):
    with open('model_config.txt', 'w') as file:
        file.write(model_path)


def load_model(net, device):  # 修改了这里
    with open('model_config.txt', 'r') as file:
        model_path = file.readline().strip()
    net.load_state_dict(torch.load(model_path, map_location=device))


def train(net, train_data, test_data, epochs, device):  # 修改了这里
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
    for epoch in range(epochs):
        for (x, y) in train_data:
            x, y = x.to(device), y.to(device)  # 添加了这里
            net.zero_grad()
            # output = net(x.view(-1, 28 * 28))
            output = net(x)
            loss = torch.nn.functional.nll_loss(output, y)
            loss.backward()
            optimizer.step()
        accuracy = evaluate(test_data, net, device)
        print("epoch", epoch, "accuracy;", accuracy)
        model_path = save_model(net, epoch)
        update_config(model_path)


def infer(net, test_data, start, end, device):
    net.eval()
    net.to(device)
    load_model(net, device)
    with torch.no_grad():
        for n, (x, _) in enumerate(test_data):
            if n < start or n >= end:
                continue
            x = x.to(device)  # 修改了这里
            # predict = torch.argmax(net.forward(x[0].view(-1, 28 * 28)))
            predict = torch.argmax(net(x[0].unsqueeze(0)))  # 使用单个图像的批处理
            plot.figure(n)
            plot.imshow(x[0].cpu().view(28, 28))  # 修改了这里
            plot.title("prediction: " + str(int(predict)))
            plot.show()


def main():
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = dml
    print("Using device:", device)
    mode = input("Enter mode (train = 0 / infer = 1): ")
    net = CNN().to(device)   # 修改了这里

    if mode == "train" or int(mode) == 0:
        epochs = int(input("Enter number of epochs: "))
        train_data = get_data_loader(is_train=True)
        test_data = get_data_loader(is_train=False)
        train(net, train_data, test_data, epochs, device)
    elif mode == "infer" or int(mode) == 1:
        test_data = get_data_loader(is_train=False)
        start = int(input("Enter start index for inference(for example 2): "))
        end = int(input("Enter end index for inference(for example 5): "))
        infer(net, test_data, start, end, device)


if __name__ == "__main__":
    main()

其余例如cuda和cpu同理。

使用CNN之后，相同训练速度前提下，准确度大大提升