Mnist手写数字识别Kaggle竞赛（99.5%+预测精度）

最新推荐文章于 2023-07-23 21:51:04 发布

Theseus丶

最新推荐文章于 2023-07-23 21:51:04 发布

阅读量6.2k

点赞数 1

文章标签：人工智能 pytorch 深度学习机器学习 python

本文链接：https://blog.csdn.net/weixin_59505571/article/details/125916898

版权

比赛地址：Digit Recognizer | Kaggle

数据集下载地址：链接: https://pan.baidu.com/s/1sOhknWv0bP-loqczapih4w 提取码: urad

数据集KaggleAPI：kaggle competitions download -c digit-recognizer

数据集中包含三个文件：

1.train.csv

文件包含42000行和785列，其中第一列为标签列，其余784列为像素值，实际为28*28的图像。

2.test.csv

文件包含28000行和784列，相较于train.csv少了第一列标签列，其余784列同样为28*28的像素。

3.sample_submission.csv

本文件提供一个参考提交公式，第二列标签列值全为0。提交时输出的结果应和本文件格式一致，第一列为ImageId，第二列为预测的Label。

了解了比赛的数据集后，接下来开始对数据进行处理

首先导入需要使用的模块

import torch
import torch.nn as nn
from torch.nn import functional as F
from resnest.torch import resnest50

import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import transforms
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import KFold
import cv2
import os
import matplotlib.pyplot as plt
import torchvision.models as models
from tqdm import tqdm

接下来正式开始数据分析与处理步骤

1.数据集的读取与格式处理

train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

#将标签列单独提取出来，方便后面预测时使用
labels = np.array(train_df.pop('label'))
#将训练集图片提取出来，同时reshape为1*28*28的单通道图片
train_imgs = np.array(train_df)
train_imgs = train_imgs.reshape((42000, 1, 28, 28))
#测试集同训练集
test_imgs = np.array(test_df)
test_imgs = test_imgs.reshape((28000, 1, 28, 28))

2.重写Dataset

#读取训练数据
class TrainData(Dataset):
    def __init__(self, img, label):
        self.img = img
        self.label = label
        self.len = len(label)
    def __getitem__(self, index):
        cur_img = self.img[index]
        cur_label = self.label[index]
        return cur_img, cur_label
    def __len__(self):
        return self.len


train_data = TrainData(train_imgs, labels)

#读取测试数据
class TestData(Dataset):
    def __init__(self, img):
        self.img = img
        self.len = len(img)
    def __getitem__(self, index):
        cur_img = self.img[index]
        return cur_img
    def __len__(self):
        return self.len


test_data = TestData(test_imgs)

3.设置训练模型

from resnest.torch import resnest50
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        model = model
        for param in model.parameters():
            param.requires_grad = False

def resnest_model(num_classes, feature_extract = False):
    model_ft = resnest50(pretrained=True)
    #原resnest网络中第一层输入通道数为3，但MNIST数据集为单通道数据集，故将第一层输入通道数改为1
    model_ft.conv1[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))

    return model_ft

4.设置超参数

k_folds = 5
num_epochs = 30
learning_rate = 1e-4
weight_decay = 1e-3
train_loss_function = nn.CrossEntropyLoss()
valid_loss_function = nn.CrossEntropyLoss()
results = {}
# 使训练结果可复现
torch.manual_seed(42)

# 设置K折交叉验证
kfold = KFold(n_splits=k_folds, shuffle=True)

5.开始训练

for fold, (train_ids,valid_ids) in enumerate(kfold.split(train_data)):
    save_path = f'./model-fold-{fold}.pth'
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, sampler = train_subsampler, num_workers=0)
    valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_ids)
    validloader = torch.utils.data.DataLoader(train_data, batch_size=32, sampler = valid_subsampler, num_workers=0)
    model = resnest_model(10)
    model = model.to(device)
    model.device = device
    optimizer = torch.optim.AdamW(model.parameters(),lr=learning_rate,weight_decay= weight_decay)
    scheduler = CosineAnnealingLR(optimizer,T_max=10)
    for epoch in range(0,num_epochs):
        model.train()
    # Print epoch
        print(f'Starting epoch {epoch+1}')
    # These are used to record information in training
        train_losses = []
        train_accs = []
    # Iterate the training set by batches
        for batch in tqdm(trainloader):
      # Move images and labels to GPU
            imgs, labels = batch
            imgs = imgs.to(device,dtype = torch.float32)
            labels = labels.to(device,dtype = torch.float32)
      # Forward the data
            logits = model(imgs)
      # Calculate loss
            loss = train_loss_function(logits,labels.long())
      # Clear gradients in previous step
            optimizer.zero_grad()
      # Compute gradients for parameters
            loss.backward()
      # Update the parameters with computed gradients
            optimizer.step()
            train_losses.append(loss.item())
        print("第%d个epoch的学习率：%f" % (epoch+1,optimizer.param_groups[0]['lr']))
        scheduler.step()
        train_loss = np.sum(train_losses) / len(train_losses)
        print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}")
    save_path = f'./model-fold-{fold}.pth'
    torch.save(model.state_dict(),save_path)
    model.eval()
    valid_losses = []
    valid_accs = []
    with torch.no_grad():
        for batch in tqdm(validloader):
            imgs, labels = batch
  # No gradient in validation
            logits = model(imgs.to(device, dtype = torch.float32))
            loss = valid_loss_function(logits,labels.to(device).long())
            acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
            loss = loss.to('cpu')
            acc = acc.to('cpu')
      # Record loss and accuracy
            valid_losses.append(loss.item())        
            valid_accs.append(acc)
    # The average loss and accuracy
        valid_loss = np.sum(valid_losses)/len(valid_losses)
        valid_acc = np.sum(valid_accs)/len(valid_accs)
        print(f"[ Valid | {epoch + 1:03d}/{num_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
        print('Accuracy for fold %d: %d' % (fold, valid_acc))
        print('--------------------------------------')
        results[fold] = valid_acc
# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
print('--------------------------------')
total_summation = 0.0
for key, value in results.items():
    print(f'Fold {key}: {value} ')
    total_summation += value
print(f'Average: {total_summation/len(results.items())} ')

5次交叉验证训练后所得结果：

可见训练精度大致在99.3%左右

6.进行测试集的测试

testloader = torch.utils.data.DataLoader(
                      test_data,
                      batch_size=32, num_workers=0)

model = resnest_model(10)
model = model.to(device)

for test_fold in range(k_folds):
    model_path = f'./model-fold-{test_fold}.pth'
    saveFileName = f'./submission-fold-{test_fold}.csv'
    model.load_state_dict(torch.load(model_path))
    model.eval()

    # Initialize a list to store the predictions.
    predictions = []
    # Iterate the testing set by batches.
    for batch in tqdm(testloader):
        imgs = batch
        with torch.no_grad():
            logits = model(imgs.to(device,dtype = torch.float32))

      # Take the class with greatest logit as prediction and record it.
        predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())

    preds = []
    for prediction in predictions:
        preds.append(prediction)

    test_data = pd.read_csv('./sample_submission.csv')
    test_data['Label'] = pd.Series(preds)
    submission = pd.concat([test_data['ImageId'], test_data['Label']], axis=1)
    submission.to_csv(saveFileName, index=False)
print("Finish current work!")

7.进行五次交叉验证所得结果的融合工作

df_all = pd.read_csv('./sample_submission.csv')
df_id = df_all.drop(['Label'], axis = 1)
df_s = df_id.copy()
df_s = df_id.drop(['ImageId'], axis = 1)

for i in range(0, k_folds):
    df = pd.read_csv(f'./submission-fold-{i}.csv')
    df_s[i] = df['Label']
df_s.head()

所得的表格：

对每一行的结果求众数：

result = df_s.mode(axis = 1)
result = result.iloc[:, 0]
df_s['result'] = result.astype('uint8')
df_id['Label'] = df_s['result']
df_id.head()

所得结果：

8.结果保存

submission = df_id.to_csv('./result.csv', index = False)
print("Finish all the works!")

至此，我们完成了对于MNIST数据集的全部工作，此时将result.csv文件提交至kaggle竞赛即可完成本次竞赛。

9.竞赛结果

可以看到，在使用ResNest进行迁移学习后，我们取得了0.99546的预测精度。

10.后续精度提高

对于本次竞赛，可以考虑以下几种措施进一步提升预测精度：

（1）：使用多个神经网络进行预测，取多个模型预测的众数

（2）：更换超参数，如增大或减小初始学习率，Adam优化器参数调整

（3）：使用不同的resnest随机种子，选取预测精度高的种子。

Theseus丶

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
Mnist手写数字识别Kaggle竞赛（99.5%+预测精度）

对于kaggle上深度学习入门Digit Recognizer进行竞赛，得到99.5%+的预测精度。
复制链接

扫一扫

Mnist手写数字识别Kaggle竞赛（99.5%+预测精度）

“相关推荐”对你有帮助么？