卷积神经网络(CNN)
此文章仅为本人学习笔记,侵权删,有错误的地方也希望各位大佬指出
题目来自于 https://github.com/nndl/nndl.github.io
问题描述:
利用卷积神经网络,实现对MNIST 数据集的分类问题。
数据集:
MNIST数据集包括60000张训练图片和10000张测试图片。图片样本的数量已经足够训练一个很复杂的模型(例如 CNN的深层神经网络)。它经常被用来作为一个新 的模式识别模型的测试用例。而且它也是一个方便学生和研究者们执行用例的数据集。除此之外,MNIST数据集是一个相对较小的数据集,可以在你的笔记本CPUs上面直接执行
题目要求:
tensorflow版的卷积神经网路 conv2d() 和max_pool_2x2()函数,然后补全两层卷积的 8个空;
pytorch版本的卷积神经网络 需要补齐 self.conv1 中 nn.Conv2d( ) 和 self.conv2( ) 的参数,还需要填写 x = x.view( )中的内容。
两个版本的训练精度都应该在 96% 以上。
解决
pytorch
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import torch.nn.functional as F
import numpy as np
learning_rate = 1e-4
keep_prob_rate = 0.7 #
max_epoch = 3
BATCH_SIZE = 50
DOWNLOAD_MNIST = False
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
# not mnist dir or mnist is empyt dir
DOWNLOAD_MNIST = True
train_data = torchvision.datasets.MNIST(root='./mnist/',train=True, transform=torchvision.transforms.ToTensor(), download=DOWNLOAD_MNIST,)
train_loader = Data.DataLoader(dataset = train_data ,batch_size= BATCH_SIZE ,shuffle= True)
test_data = torchvision.datasets.MNIST(root = './mnist/',train = False)
test_x = Variable(torch.unsqueeze(test_data.test_data,dim = 1),volatile = True).type(torch.FloatTensor)[:500]/255.
test_y = test_data.test_labels[:500].numpy()
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist/MNIST\raw\train-images-idx3-ubyte.gz
100.0%
Extracting ./mnist/MNIST\raw\train-images-idx3-ubyte.gz to ./mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist/MNIST\raw\train-labels-idx1-ubyte.gz
102.8%
Extracting ./mnist/MNIST\raw\train-labels-idx1-ubyte.gz to ./mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist/MNIST\raw\t10k-images-idx3-ubyte.gz
100.0%
Extracting ./mnist/MNIST\raw\t10k-images-idx3-ubyte.gz to ./mnist/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist/MNIST\raw\t10k-labels-idx1-ubyte.gz
112.7%
Extracting ./mnist/MNIST\raw\t10k-labels-idx1-ubyte.gz to ./mnist/MNIST\raw
D:\Anaconda3\envs\pytorch\lib\site-packages\torchvision\datasets\mnist.py:498: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ..\torch\csrc\utils\tensor_numpy.cpp:180.)
return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
D:\Anaconda3\envs\pytorch\lib\site-packages\torchvision\datasets\mnist.py:67: UserWarning: test_data has been renamed data
warnings.warn("test_data has been renamed data")
D:\Anaconda3\envs\pytorch\lib\site-packages\ipykernel_launcher.py:5: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
"""
D:\Anaconda3\envs\pytorch\lib\site-packages\torchvision\datasets\mnist.py:57: UserWarning: test_labels has been renamed targets
warnings.warn("test_labels has been renamed targets")
test_x.shape,test_y.shape
(torch.Size([500, 1, 28, 28]), (500,))
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d( # ???
# patch 7 * 7 ; 1 in channels ; 32 out channels ; ; stride is 1
# padding style is same(that means the convolution opration's input and output have the same size)
in_channels= 1 , #输入1
out_channels= 32 , #输出32
kernel_size= 7 , #卷积大小为 7*7
stride= 1 , # 步长为 1
padding= 3 , #填充零的个数= 3
),# 公式为(书的P113 )out =(M - K + 2P )/S + 1 => 28 =(28 - 7 + 2P)/1 + 1 => P=3 其中M=28 ,out=28 因为图像的大小为28*28书的P113
nn.ReLU(), # activation function
nn.MaxPool2d(2), # pooling operation shape:[32,14,14]
)
self.conv2 = nn.Sequential( # ???
# line 1 : convolution function, patch 5*5 , 32 in channels ;64 out channels; padding style is same; stride is 1
# line 2 : choosing your activation funciont
# line 3 : pooling operation function.
nn.Conv2d(
in_channels= 32 , #输入32
out_channels= 64 , #输出64
kernel_size= 5 , #卷积大小5
stride= 1 , # 步长为 1
padding= 2 , #填充零的个数= 2
),
nn.ReLU(), # activation function
nn.MaxPool2d(2), # pooling operation shaoe:[64,7,7]
)
self.out1 = nn.Linear( 7*7*64 , 1024 , bias= True) # full connection layer one
self.dropout = nn.Dropout(keep_prob_rate)
self.out2 = nn.Linear(1024,10,bias=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view( -1,64*7*7) # flatten the output of coonv2 to (batch_size ,32 * 7 * 7) # ???
out1 = self.out1(x)
out1 = F.relu(out1)
out1 = self.dropout(out1)
out2 = self.out2(out1)
output = F.softmax(out2)
return output
def test(cnn):
global prediction
y_pre = cnn(test_x)
_,pre_index= torch.max(y_pre,1)
pre_index= pre_index.view(-1)
prediction = pre_index.data.numpy()
correct = np.sum(prediction == test_y)
return correct / 500.0
def train(cnn):
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate )
loss_func = nn.CrossEntropyLoss()
for epoch in range(max_epoch):
for step, (x_, y_) in enumerate(train_loader):
x ,y= Variable(x_),Variable(y_)
output = cnn(x)
loss = loss_func(output,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step !&