实战篇 MNIST分类
网上有关于MNIST分类的文章:https://blog.csdn.net/out_of_memory_error/article/details/81414986
但是他使用的是pytorch自带的mnist数据集,而我想使用一个Kaggle的csv格式的数据集来练习一下。本篇即为使用csv文件的MNIST分类。
csv文件:
神经网络
神经网络部分其实比较容易,先使用几个简单的网络:
import torch.nn as nn
class FCNet(nn.Module):
def __init__(self, input_shape, out_dim):
super(FCNet, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(input_shape, 1024), nn.ReLU(True)) # 隐藏层1
self.layer2 = nn.Sequential(nn.Linear(1024, 512), nn.ReLU(True)) # 隐藏层2
self.layer3 = nn.Sequential(nn.Linear(512, out_dim)) # 输出层
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
当然,参考博客上也有其他的简单的网络:
import torch.nn as nn
#定义三层全连接神经网络,每一层都是线性的
class simpleNet(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(simpleNet, self).__init__()
self.layer1 = nn.Linear(in_dim, n_hidden_1)
self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
self.layer3 = nn.Linear(n_hidden_2, out_dim)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
#添加激活函数,改进网络的非线性
class Activation_Net(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Activation_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
#添加一个快速收敛的方法——批标准化
class Batch_Net(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(Batch_Net, self).__init__()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1),nn.BatchNorm1d(n_hidden_1), nn.ReLU(True))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2),nn.BatchNorm1d(n_hidden_2), nn.ReLU(True))
self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
数据处理部分
import pandas as pd
import numpy as np
from models import FCNet, simpleNet, Activation_Net
from torch.autograd import Variable
from torch import nn, optim
import torch
train = pd.read_csv('data/train.csv')
# 由于全部的mnist训练起来有点慢,所以我们只取一部分
X = train
y = X['label']
X = X.drop(labels=['label'], axis=1)
X = X / 255.0
print(X.shape)
# 将x,y转为tensor
X, y = np.array(X), np.array(y)
X = X.astype(np.float32)
X, y = torch.from_numpy(X), torch.from_numpy(y)
# 做一个生成器,方便在训练时批量返回数据
def dataG(dataX, dataY, batch):
for i in range(0, len(dataX), batch):
yield dataX[i:i+batch], dataY[i:i+batch]
其中,我们自己做了一个生成器,这样,就可以在训练时加入batch_size了。
训练
net = FCNet(784, 10) # 带批处理的网络还是不行,先使用普通的神经网络把
if torch.cuda.is_available():
net1 = net1.cuda()
net2 = net2.cuda()
optimizer = optim.SGD(net.parameters(), lr=0.001)
critien = nn.CrossEntropyLoss()
EPOCH = 32
BATCH = 32
# 开始训练
for i in range(EPOCH):
print('EPOCH:', i , '/' , EPOCH)
for count, (miniX, miniY) in enumerate(dataG(X, y, BATCH)):
miniX = Variable(miniX)
miniY = Variable(miniY)
if torch.cuda.is_available():
miniX, miniY = miniX.cuda(), miniY.cuda()
predictions = net(miniX) # 前向传播
loss = critien(predictions, miniY.long()) # 计算损失函数值
optimizer.zero_grad()
loss.backward() # 反向传播
optimizer.step()
if count // 50 == 0:
print('loss: {}'.format(loss.data.item())) # 输出损失函数值
训练过程
测试集
test = pd.read_csv('data/test.csv')
sample = pd.read_csv('data/sample_submission.csv')
X_test = np.array(test)
# 测试集
X_test = X_test.astype(np.float32)
X_test = torch.from_numpy(X_test) # 转为tensor
if torch.cuda.is_available():
X_test = X_test.cuda()
X_test = Variable(X_test) # 转到Variable
predic = net(X_test)
print(predic[:5])
predic = predic.data.cpu().numpy() # 神经网络的输出是variable,要转为numpy
result = np.argmax(predic, axis=1)
sub = {'ImageId':sample['ImageId'],
'Label': result}
sub = pd.DataFrame(sub)
sub.to_csv('data/submission.csv', index=False)
结果:
因为kaggle最近上传不上去文件,所以不知道实际测试集的得分。抽时间把训练集划分一下做一下测试看看。
完整代码
import pandas as pd
import numpy as np
from models import FCNet, simpleNet, Activation_Net
from torch.autograd import Variable
from torch import nn, optim
import torch
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
sample = pd.read_csv('data/sample_submission.csv')
X_test = np.array(test)
X = train
y = X['label']
X = X.drop(labels=['label'], axis=1)
X = X / 255.0
print(X.shape)
# 转为tensor
X, y = np.array(X), np.array(y)
X = X.astype(np.float32)
X, y = torch.from_numpy(X), torch.from_numpy(y)
# 做一个迭代器,方便在训练时批量返回数据
def dataG(dataX, dataY, batch):
for i in range(0, len(dataX), batch):
yield dataX[i:i+batch], dataY[i:i+batch]
# 神经网络
net1 = Activation_Net(28*28, 100, 100, 10)
net2 = FCNet(784, 10) # 带批处理的网络还是不行,先使用普通的神经网络把
if torch.cuda.is_available():
net1 = net1.cuda()
if torch.cuda.is_available():
net2 = net2.cuda()
optimizer = optim.SGD(net2.parameters(), lr=0.001)
critien = nn.CrossEntropyLoss()
EPOCH = 32
BATCH = 32
# 开始训练
for i in range(EPOCH):
print('EPOCH:', i , '/' , EPOCH)
for count, (miniX, miniY) in enumerate(dataG(X, y, BATCH)):
miniX = Variable(miniX)
miniY = Variable(miniY)
# print(miniX.shape)
# print(miniY)
if torch.cuda.is_available():
miniX, miniY = miniX.cuda(), miniY.cuda()
predictions = net2(miniX) # 前向传播
# print(predictions.shape)
loss = critien(predictions, miniY.long()) # 计算损失函数值
optimizer.zero_grad()
loss.backward() # 反向传播
optimizer.step()
if count // 1000 == 0:
print('loss: {}'.format(loss.data.item()))
# 测试集
X_test = X_test.astype(np.float32)
X_test = torch.from_numpy(X_test)
if torch.cuda.is_available():
X_test = X_test.cuda()
X_test = Variable(X_test)
predic = net2(X_test)
print(predic[:5])
predic = predic.data.cpu().numpy() # 神经网络的输出是variable,要转为numpy
result = np.argmax(predic, axis=1)
# 存为csv文件
sub = {'ImageId':sample['ImageId'],
'Label': result}
sub = pd.DataFrame(sub)
sub.to_csv('data/submission.csv', index=False)