深度学习中,卷积神经网络(CNN)用的场合比较多,尤其是视觉系统中,下面是一个手写数字的识别的简单卷积神经网络:
网络的构成是“Convolution(卷积层) - ReLU(阶跃) - Pooling(池化) -Affine(全连接) - ReLU(阶跃) - Affine(全连接) - Softmax(逻辑回归)”,我们将它实现为名为 SimpleConvNet 的类,
#coding:utf-8
import sys,os
sys.path.append(os.pardir)
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *
from common.gradient import numerical_gradient
class SimpleConvNet:
"""简单的卷积神经网络
conv - relu - pool -affine - relu - affine - softmax
Parameters
----------
input_size:输入大小 784
hidden_size_list:隐藏层大小 e.g.[100,100,100,]
output_size:输出大小 10
activation:'relu' or 'sigmoid'
weight_init_std:权重标准差(0.01)
'relu': 'he'
'sigmoid':'xavier'
"""
def __init__(self,input_dim=(1,28,28),
conv_param={'filter_num':30,'filter_size':5,'pad':0,'stride':1},hidden_size=100,output_size=10,weight_init_std=0.01):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2*filter_pad)/filter_stride + 1
pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
self.params = {}
self.params['W1'] = weight_init_std* np.random.randn(filter_num,input_dim[0],filter_size,filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = weight_init_std * np.random.randn(pool_output_size,hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = weight_init_std * np.random.randn(hidden_size,output_size)
self.params['b3'] = np.zeros(output_size)
self.layers = OrderedDict()
self.layers['Conv1'] = Convolution(self.params['W1'],self.params['b1'],
conv_param['stride'],conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2,pool_w=2,stride=2)
self.layers['Affine1'] = Affine(self.params['W2'],self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'],self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def predict(self,x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self,x,t):
y = self.predict(x)
return self.last_layer.forward(y,t)
def accuracy(self,x,t,batch_size=100):
if t.ndim != 1: t = np.argmax(t,axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y,axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
#数值求梯度 时间较长
def numerical_gradient(self, x, t):
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
#反向误差传播法 求梯度值 速度快
def gradient(self,x,t):
#forward
self.loss(x,t)
#backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
#设定
grads = {}
grads['W1'],grads['b1'] = self.layers['Conv1'].dW,self.layers['Conv1'].db
grads['W2'],grads['b2'] = self.layers['Affine1'].dW,self.layers['Affine1'].db
grads['W3'],grads['b3'] = self.layers['Affine2'].dW,self.layers['Affine2'].db
return grads
def save_params(self,file_name="params.pkl"):
params = {}
for key,val in self.params.items():
params[key] = val
with open(file_name,'wb') as f:
pickle.dump(params,f)
def load_params(self,file_name="params.pkl"):
with open(file_name,'rb') as f:
params = pickle.load(f)
for key,val in params.items():
self.params[key] = val
for i,key in enumerate(['Conv1','Affine1','Affine2']):
self.layers[key].W = self.params['W' + str(i+1)]
self.layers[key].b = self.params['b' + str(i+1)]
卷积层的实现:
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
# backward时使用
self.x = None
self.col = None
self.col_W = None
# 权重
self.dW = None
self.db = None
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
池化层的实现:
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
最后,我们通过一个多批次的训练过程类来获取手写数字的识别准确率:
#coding:utf-8
import sys,os
sys.path.append(os.pardir)
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from simple_convnet import SimpleConvNet
from common.trainer import Trainer
(x_train,t_train),(x_test,t_test) = load_mnist(flatten=False)
#数据太多,我们截取其中一段数据
x_train, t_train = x_train[:5000], t_train[:5000]
x_test, t_test = x_test[:1000], t_test[:1000]
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param={'filter_num':30,'filter_size':5,'pad':0,'stride':1},
hidden_size=100,output_size=10,weight_init_std=0.01)
trainer = Trainer(network,x_train,t_train,x_test,t_test,
epochs=max_epochs,mini_batch_size=100,
optimizer='Adam',optimizer_param={'lr':0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
network.save_params("params.pkl")
print("Saved Network Parameters!")
markers = {'train':'o','test':'s'}
x = np.arange(max_epochs)
plt.plot(x,trainer.train_acc_list,marker='o',label='train',markevery=2)
plt.plot(x,trainer.test_acc_list,marker='s',label='test',markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0,1.0)
plt.legend(loc='lower right')
plt.show()
训练过程打印数据及图形效果:
train loss:0.014791653412
train loss:0.0209553373016
train loss:0.0219426514735
train loss:0.0131076389416
train loss:0.0131761959004
train loss:0.0129010800558
train loss:0.0254068149707
train loss:0.0119920964359
train loss:0.0127169736407
train loss:0.0141627506876
train loss:0.00552698383338
=============== Final Test Accuracy ===============
test acc:0.958
Saved Network Parameters!
下面是图片显示的是本次训练数据的准确率和测试数据的准确率,观察到,两条线趋势基本吻合,训练数据的准确率(99.2%)要好于测试数据的准确率(95.8%),由于数据量比较少,所以测试的准确率还没有达到最高,在数据量充分的情况下可达98.9%以上。
下面经过几个小时的训练所有数据,得到的训练精确率,和测试精确率:
=== epoch:20, train acc:0.998, test acc:0.985 ===
=============== Final Test Accuracy ===============
test acc:0.9893
测试精确率达到98.93%。