利用最基础的卷积神经网络训练手写识别
本文利用pytorch搭建了一个最基础的卷积神经网络来训练手写识别
其卷积网络如下
第一次经过channle为10 卷积核大小为55的卷积操作
第二次经过一次卷积核为22的maxpool池化操作
第三次经过channel为20 卷积核大小为55的卷积操作
第四次经过一次卷积核为22的maxpool池化操作
最后将该map打平,其特征有320个经过一个全连接层线性输出10个输出值
具体代码如下
导入本地minist数据集并转化为numpy形式
import torch
import torch.nn as nn
import numpy as np
from visdom import Visdom
import os
import gzip
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
def load_data(data_folder):
files = [
'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
]
paths = []
for fname in files:
paths.append(os.path.join(data_folder,fname))
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
(train_images, train_labels), (test_images, test_labels) = load_data('C:\\Users\\Lenovo\\Desktop\\pythonLearn\\machine_learning\\ex3\\data')
定义构建dataset与dataloder
class get_Data(Dataset):
def __init__(self,x,y):
self.inputs = x
self.labels = y
self.length = len(y)
def __getitem__(self,item):
return self.inputs[item],self.labels[item]
def __len__(self):
return self.length
def get_data(train_images, train_labels,test_images, test_labels):
train_data = get_Data(train_images,train_labels)
test_data = get_Data(test_images,test_labels)
train_loader = DataLoader(train_data,batch_size=32,shuffle=True)
test_loader = DataLoader(test_data,batch_size=32,shuffle=True)
return train_loader,test_loader
卷积神经网络的定义
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1,10,kernel_size=5)
self.conv2 = nn.Conv2d(10,20,kernel_size=5)
self.maxpool = nn.MaxPool2d(2)
self.linear = nn.Linear(320,10)
def forward(self,x):
pred = self.conv1(x)
pred = self.maxpool(pred)
pred = F.relu(pred,inplace=True)
pred = self.conv2(pred)
pred = self.maxpool(pred)
pred = F.relu(pred,inplace=True)
batch_size = len(pred)
pred = pred.view(batch_size,-1)
pred = self.linear(pred)
return pred
def train(self,num,learning_rate,train_data,device):
optimizer = torch.optim.Adam(self.parameters(),lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)
vis = Visdom()
cnt = 0
self.device = device
for epoch in range(num):
total_loss = 0
for index,(inputs,labels) in enumerate(train_data):
inputs = inputs.double()
inputs = inputs.to(device)
labels = labels.to(device)
inputs = inputs.unsqueeze(1)
pred = self(inputs)
loss = criteon(pred,labels.long())
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
print("第{}代训练,误差:{}".format(epoch+1,total_loss))
vis.line([total_loss],[epoch+1],update='append',win="loss_train")
if epoch%10 == 0:
cur = self.test_curancy(test_data)
cnt = cnt+1
vis.line([cur],[cnt],update='append',win='验证精度')
print("第{}次验证,精度为:{}".format(cnt,cur))
def test_curancy(self,test_data):
count = 0
for index,(inputs,labels) in enumerate(test_data):
inputs = inputs.double().unsqueeze(1)
inputs = inputs.to(self.device)
labels = labels.to(self.device)
pred_index = torch.argmax(self(inputs),dim=1)
count += torch.sum(torch.eq(pred_index,labels)).item()
return count/10000 ##验证集有10000个数据 所以除10000
生成神经网络并调用
device = torch.device("cuda:0")
net = Net().to(device).double()
train_data,test_data = get_data(train_images,train_labels,test_images,test_labels)
net.train(100,0.001,train_data,device)