1-手写数字识别-pytorch版本(从csv文件读取数据)

1 介绍

本文对手写数字识别进行分类,数据使用csv格式,版本使用pytorch版本,模型自己搭建。

2 导入包

import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

3 导入数据集

data = pd.read_csv(r"train.csv", dtype=np.float32)

4 划分特征和标签,并且数据归一化

x = data.loc[:, data.columns != "label"].values /255 # normalization
y = data.label.values

5 划分训练集-测试集

X_train, X_test, y_train, y_test = train_test_split(x, y,
                                                    train_size=0.8,
                                                    random_state=42,
                                                    shuffle=True)

6 图像展示

for i in range(20):
    plt.subplot(4,5,i+1)
    plt.imshow(X_train[i].reshape(28, 28))
    plt.axis("off")
    plt.title(str(int(y_train[i])))
plt.show()    

在这里插入图片描述

7 数据类型转换,使用 from_numpy转换为tensor

X_train = torch.from_numpy(X_train)
y_train = torch.from_numpy(y_train).type(torch.LongTensor)  # data type is long

# create feature and targets tensor for test set.
X_test = torch.from_numpy(X_test)
y_test = torch.from_numpy(y_test).type(torch.LongTensor)  # data type is long   

8 设置参数,构建DataLoader

batch_size, epoch and iteration

batch_size = 100
n_iters = 4000
num_epochs = n_iters / (len(X_train) / batch_size)
num_epochs = int(num_epochs)
print(num_epochs)
# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train,y_train)
test = torch.utils.data.TensorDataset(X_test,y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = False)

9 搭建CNN模型

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Convolution 1
        # in_channels:输入通道数1,输入图像是单通道
        # out_channels;输出通道数目
        # kernel_size:5*5:(不关键) 一般是3*3
        # stride:步长1:(不关键)
        # padding:填充(不关键)
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=2, padding=0)
        self.relu1 = nn.ReLU()

        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)

        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=0)
        self.relu2 = nn.ReLU()

        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)

        # Fully connected 1
        self.fc1 = nn.Linear(32, 10)

    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)

        # Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.maxpool2(out)

        # flatten
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)

        return out 

10 打印模型参数

model = CNNModel()
import torchsummary
torchsummary.summary(model, (1, 28, 28))

在这里插入图片描述

11 模型训练和验证

# Cross Entropy Loss
error = nn.CrossEntropyLoss()

# SGD Optimizer
learning_rate = 0.1# 学习率:0.0001,0.001,0.01,0.1,0.5
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# CNN model training
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):

        train = Variable(images.view(batch_size, 1, 28, 28))
        labels = Variable(labels)

        # Clear gradients
        optimizer.zero_grad()

        # Forward propagation
        outputs = model(train)

        # Calculate softmax and ross entropy loss
        loss = error(outputs, labels)

        # Calculating gradients
        loss.backward()

        # Update parameters
        optimizer.step()

        count += 1

        if count % 50 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                test = Variable(images.view(batch_size, 1, 28, 28))

                # Forward propagation
                outputs = model(test)

                # Get predictions from the maximum value
                predicted = torch.max(outputs.data, 1)[1]

                # Total number of labels
                total += len(labels)

                correct += (predicted == labels).sum()

            accuracy = 100 * correct / float(total)

            # store loss and iteration
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)
        if count % 500 == 0:
            # Print Loss
            print('Iteration: {}  Loss: {}  Accuracy: {} %'.format(count, loss.data, accuracy))

在这里插入图片描述

12 loss可视化

plt.plot(iteration_list,loss_list)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("CNN: Loss vs Number of iteration")
plt.show()

在这里插入图片描述

13 精度可视化

# visualization accuracy
plt.plot(iteration_list,accuracy_list,color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
plt.title("CNN: Accuracy vs Number of iteration")
plt.show()

在这里插入图片描述

  • 0
    点赞
  • 17
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
你可以按照以下步骤在PyTorch中实现CNN手写数字识别,包括使用CSV文件进行数据读取、保存和加载模型: 1. 导入所需的库和模块: ```python import torch import torch.nn as nn import torch.optim as optim import pandas as pd from torch.utils.data import DataLoader, Dataset ``` 2. 创建一个自定义的数据集类,用于读取CSV文件中的数据: ```python class DigitDataset(Dataset): def __init__(self, csv_file): self.data = pd.read_csv(csv_file) def __len__(self): return len(self.data) def __getitem__(self, idx): image = self.data.iloc[idx, 1:].values.reshape(28, 28).astype('float32') / 255.0 label = self.data.iloc[idx, 0] return image, label ``` 3. 定义CNN模型: ```python class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2d(kernel_size=2) self.fc = nn.Linear(7*7*32, 10) def forward(self, x): x = self.conv1(x) x = self.relu1(x) x = self.pool1(x) x = self.conv2(x) x = self.relu2(x) x = self.pool2(x) x = x.view(x.size(0), -1) x = self.fc(x) return x ``` 4. 定义训练函数和测试函数: ```python def train(model, train_loader, criterion, optimizer): model.train() for images, labels in train_loader: optimizer.zero_grad() outputs = model(images.unsqueeze(1)) loss = criterion(outputs, labels) loss.backward() optimizer.step() def test(model, test_loader): model.eval() correct = 0 with torch.no_grad(): for images, labels in test_loader: outputs = model(images.unsqueeze(1)) _, predicted = torch.max(outputs.data, 1) correct += (predicted == labels).sum().item() accuracy = correct / len(test_loader.dataset) return accuracy ``` 5. 加载数据集并创建数据加载器: ```python train_dataset = DigitDataset('train.csv') test_dataset = DigitDataset('test.csv') train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) ``` 6. 创建CNN模型实例、损失函数和优化器: ```python model = CNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) ``` 7. 进行训练和测试: ```python num_epochs = 10 for epoch in range(num_epochs): train(model, train_loader, criterion, optimizer) accuracy = test(model, test_loader) print(f'Epoch {epoch+1}, Test Accuracy: {accuracy}') torch.save(model.state_dict(), 'digit_model.pt') ``` 8. 加载保存的模型并进行预测: ```python model = CNN() model.load_state_dict(torch.load('digit_model.pt')) # 假设有一个名为image的张量用于预测 output = model(image.unsqueeze(0).unsqueeze(0)) _, predicted = torch.max(output.data, 1) print(f'Predicted digit: {predicted.item()}') ``` 这就是使用CSV文件进行手写数字识别的基本步骤。你可以根据自己的需求进行修改和优化。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

王小葱鸭

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值