手写中文数字识别PyTorch实现(全连接&卷积神经网络)

尝试一下手写汉字的数字识别,分别采用全连接神经网络和卷积神经网络

这次准备的数据集有15000张图片,每张图片大小为64*64

image-20221126171337274

image-20221126232935453

训练集10500张图片,测试集4500张图片

全连接神经网络

我们先用上次手写数字识别的全连接神经网络尝试一下

Dataset代码:

from torch.utils.data import Dataset
import torch
import cv2

class CN_MNIST(Dataset):
    def __init__(self, index_csv):
        self.index_csv = index_csv
        self.dictionary = {'零': 0, '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10, '百': 11, '千': 12, '万': 13, '亿': 14}

    def __getitem__(self, index):
        sample = self.index_csv.iloc[index]
        label = self.dictionary[str(sample['character'])]
        suite_id = sample['suite_id']
        sample_id = sample['sample_id']
        code = sample['code']
        file_name = 'images/input_' + str(suite_id) + '_' + str(sample_id) + '_' + str(code) + '.jpg'
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) / 255
        return torch.Tensor(image), torch.Tensor([label]).squeeze().long()

    def __len__(self):
        return len(self.index_csv['code'])

模型以及训练测试代码:

import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from CMINISTdataset import CN_MNIST
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
import torch.nn.functional as F
import pandas as pd


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 1 准备数据集
batch_size = 512

train_dataset = CN_MNIST(pd.read_csv('train_set.csv').sample(frac=1))

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=0)

test_dataset = CN_MNIST(pd.read_csv('test_set.csv').sample(frac=1))

test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, num_workers=0)    # 测试集不需要打乱


# 2 设计模型
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(4096, 2048)
        self.l2 = torch.nn.Linear(2048, 2048)
        self.l3 = torch.nn.Linear(2048, 1024)
        self.l4 = torch.nn.Linear(1024, 1024)
        self.l5 = torch.nn.Linear(1024, 128)
        self.l6 = torch.nn.Linear(128, 15)
        self.dropout = torch.nn.Dropout(p=0.5)
        self.norm = torch.nn.BatchNorm1d(128)

    def forward(self, x):
        x = x.view(-1, 4096)  # 将批量输入的图像展平,-1表示自动计算行数
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x)) + x
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x)) + x
        x = F.relu(self.norm(self.l5(x)))
        return self.dropout(self.l6(x))  # 最后一层不做激活


model = Net()
model.to(device)

# 3 构建损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-9)

# 4 训练
correct_list2 = []
def train(epoch):
    total = 0
    correct = 0
    running_loss = 0
    for i, data in enumerate(train_loader, 0):

        inputs, target = data  # 输入和标签
        inputs, target = inputs.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        # scheduler.step()
        running_loss += loss.item()

        predicted = torch.argmax(outputs.data, dim=1)  # 返回最大值下标
        total += target.size(0)
        correct += (predicted == target).sum().item()

    print('[%d] loss:%.3f' % (epoch + 1, running_loss))
    running_loss = 0.0
    rate = 100 * correct / total
    print('训练集的准确率为: {:.2f}'.format(rate))
    correct_list2.append(rate)

correct_list = []

# 5 测试
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # 取每一行最大值为预测结果
            _, predicted = torch.max(outputs.data, dim=1)  # 返回最大值和下标,下划线为占位符,无意义
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        rate = 100 * correct / total
        correct_list.append(rate)
        print('测试集的准确率为: {:.2f}'.format(rate))
        print('-------------------------')



if __name__ == '__main__':
    for epoch in range(50):
        model.train()
        train(epoch)
        model.eval()
        test()

# 绘制Epoch-Loss曲线
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('accuracy%')
plt.plot(np.arange(0, 50, 1), np.array(correct_list))
plt.plot(np.arange(0, 50, 1), np.array(correct_list2))
plt.show()

运行结果:

image-20221126172054346

image-20221126172102374

蓝线是测试集准确率,红线是训练集准确率,二者基本稳定在56%和87%左右波动

卷积神经网络

import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.nn as nn
from CMINISTdataset import CN_MNIST
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
import torch.nn.functional as F
import pandas as pd

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 1 准备数据集
batch_size = 512

train_dataset = CN_MNIST(pd.read_csv('train_set.csv').sample(frac=1))

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, num_workers=0)

test_dataset = CN_MNIST(pd.read_csv('test_set.csv').sample(frac=1))

test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, num_workers=0)  # 测试集不需要打乱


# 2 设计模型
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.pooling = nn.MaxPool2d(2)
        self.fc = nn.Linear(320, 15)

    def forward(self, x):
        size = x.size(0)
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        x = x.view(size, -1)
        x = self.fc(x)
        return x


model = Net()
model.to(device)

# 3 构建损失和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-9)

# 4 训练
correct_list2 = []


def train(epoch):
    total = 0
    correct = 0
    running_loss = 0
    for i, data in enumerate(train_loader, 0):
        inputs, target = data  # 输入和标签
        inputs, target = inputs.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        # scheduler.step()
        running_loss += loss.item()

        predicted = torch.argmax(outputs.data, dim=1)  # 返回最大值下标
        total += target.size(0)
        correct += (predicted == target).sum().item()

    print('[%d] loss:%.3f' % (epoch + 1, running_loss))
    running_loss = 0.0
    rate = 100 * correct / total
    print('训练集的准确率为: {:.2f}'.format(rate))
    correct_list2.append(rate)


correct_list = []


# 5 测试
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # 取每一行最大值为预测结果
            _, predicted = torch.max(outputs.data, dim=1)  # 返回最大值和下标,下划线为占位符,无意义
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        rate = 100 * correct / total
        correct_list.append(rate)
        print('测试集的准确率为: {:.2f}'.format(rate))
        print('-------------------------')


if __name__ == '__main__':
    for epoch in range(200):
        model.train()
        train(epoch)
        model.eval()
        test()

# 绘制Epoch-Loss曲线
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('accuracy%')
plt.plot(np.arange(0, 200, 1), np.array(correct_list))
plt.plot(np.arange(0, 200, 1), np.array(correct_list2))
plt.show()

运行结果如下:蓝线是测试集准确率,红线是训练集准确率,二者基本稳定在95%和90%左右波动

image-20221126192355232

image-20221126192405436

  • 0
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
以下是一个使用PyTorch实现手写数字识别卷积神经网络,其中包括多个卷积层和全连接层。 ``` import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from torchvision import datasets, transforms # 定义卷积神经网络模型 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 6, 5) # 输入通道数为1,输出通道数为6,卷积核大小为5*5 self.conv2 = nn.Conv2d(6, 16, 5) # 输入通道数为6,输出通道数为16,卷积核大小为5*5 self.fc1 = nn.Linear(16 * 4 * 4, 120) # 全连接层,输入大小为16*4*4,输出大小为120 self.fc2 = nn.Linear(120, 84) # 全连接层,输入大小为120,输出大小为84 self.fc3 = nn.Linear(84, 10) # 全连接层,输入大小为84,输出大小为10 def forward(self, x): x = nn.functional.relu(self.conv1(x)) # 卷积层1,使用relu激活函数 x = nn.functional.max_pool2d(x, 2) # 最大池化层,池化核大小为2*2 x = nn.functional.relu(self.conv2(x)) # 卷积层2,使用relu激活函数 x = nn.functional.max_pool2d(x, 2) # 最大池化层,池化核大小为2*2 x = x.view(-1, 16 * 4 * 4) # 展开成一维向量 x = nn.functional.relu(self.fc1(x)) # 全连接层1,使用relu激活函数 x = nn.functional.relu(self.fc2(x)) # 全连接层2,使用relu激活函数 x = self.fc3(x) # 输出层,不使用激活函数 return x # 加载数据 train_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=64, shuffle=True) # 定义模型和优化器 model = Net() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # 训练模型 def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = nn.functional.cross_entropy(output, target) loss.backward() optimizer.step() if batch_idx % 100 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data.item())) # 测试模型 def test(): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += nn.functional.cross_entropy(output, target, size_average=False).data.item() pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) # 运行训练和测试 for epoch in range(1, 11): train(epoch) test() ``` 在这个模型中,我们使用了两个卷积层和三个全连接层。在每个卷积层中,我们使用了多个卷积核,以增加模型的表达能力。在训练阶段,我们使用随机梯度下降(SGD)优化器,以最小化交叉熵损失函数。在测试阶段,我们计算了模型在测试集上的准确率。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值