Numpy + CNN实现手写数字识别
文章目录
Package
import math
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
Load MNIST Dataset
def one_hot_label(y):
# y.shape = (60000,)
if y.ndim == 1:
one_hot_label = np.zeros((y.shape[0], 10))
y = y.reshape(y.shape[0])
one_hot_label[range(y.shape[0]), y] = 1
# y.shape = (60000,10)
else:
one_hot_label = np.zeros(y.shape)
index = np.argmax(y, axis=1)
one_hot_label[range(y.shape[0]), index] = 1
return one_hot_label
(x_train_origin, t_train_origin), (x_test_origin,t_test_origin) = tf.keras.datasets.mnist.load_data()
X_train = x_train_origin / 255.0
X_test = x_test_origin / 255.0
m, h, w = x_train_origin.shape
X_train = X_train.reshape((m, 1, h, w))
y_train = one_hot_label(t_train_origin)
m, h, w = x_test_origin.shape
X_test = X_test.reshape((m, 1, h, w))
y_test = one_hot_label(t_test_origin)
print("shape of x_train is :" + repr(X_train.shape))
print("shape of t_train is :" + repr(y_train.shape))
print("shape of x_test is :" + repr(X_test.shape))
print("shape of t_test is :" + repr(y_test.shape))
shape of x_train is :(60000, 1, 28, 28)
shape of t_train is :(60000, 10)
shape of x_test is :(10000, 1, 28, 28)
shape of t_test is :(10000, 10)
Plot One Sample
index = 0
plt.imshow(X_train[index].reshape((28, 28)), cmap=plt.cm.gray)
plt.show()
print("y is:" + str(np.argmax(y_train[index])))
y is:5
Auxiliary Function
Implement relu activation function
def relu(input_X):
"""
Arguments:
input_X -- a numpy array
Return :
A -- a numpy array
"""
A = np.maximum(0, input_X)
return A
Implement Softmax function
Hint:
S
o
f
t
m
a
x
(
[
x
1
,
x
2
,
.
.
.
,
x
n
]
)
=
[
e
x
p
(
x
1
)
∑
i
=
1
n
e
x
p
(
x
i
)
,
e
x
p
(
x
2
)
∑
i
=
1
n
e
x
p
(
x
i
)
,
.
.
.
,
e
x
p
(
x
n
)
∑
i
=
1
n
e
x
p
(
x
i
)
]
Softmax([x_1,x_2,...,x_n])=[\frac{exp(x_1)}{\sum_{i=1}^n{exp(x_i)}},\frac{exp(x_2)}{\sum_{i=1}^n{exp(x_i)}},...,\frac{exp(x_n)}{\sum_{i=1}^n{exp(x_i)}}]
Softmax([x1,x2,...,xn])=[∑i=1nexp(xi)exp(x1),∑i=1nexp(xi)exp(x2),...,∑i=1nexp(xi)exp(xn)]
def softmax(input_X):
"""
Arguments:
input_X -- a numpy array
Return :
A: a numpy array same shape with input_X
"""
total = np.sum(np.exp(input_X), axis=1, keepdims=True)
A = np.exp(input_X) / total
return A
Implement zero pad function
def zero_pad(X, pad):
"""
Argument:
X -- python numpy array of shape (m, n_C, n_H, n_W) representing a batch of m input samples
pad -- integer, amount of padding around each image on vertical and horizontal dimensions
Returns:
X_pad -- padded input of shape (m, n_C, n_H + 2*pad, n_W + 2*pad)
"""
X_pad = np.pad(X, ((0, 0), (0, 0), (pad, pad), (pad, pad)), "constant")
return X_pad
Loss Function
Implement cross-entropy loss function
H ( p , q ) = ∑ i = 1 n p ( x i ) log 1 q ( x i ) = − ∑ i = 1 n p ( x i ) log q ( x i ) H(p, q)=\sum_{i=1}^{n} p\left(x_{i}\right) \log \frac{1}{q\left(x_{i}\right)}=-\sum_{i=1}^{n} p\left(x_{i}\right) \log q\left(x_{i}\right) H(p,q)=i=1∑np(xi)logq(xi)1=−i=1∑np(xi)logq(xi)
def cross_entropy_error(labels, logits):
"""
labels -- true label (one_hot), shape:(m,n_classes)
logits -- predicted probability, shape:(m,n_classes)
cost -- cross entropy loss
"""
cost = -np.mean(np.sum(labels * np.log(logits), 1))
return cost
Convolutional Layer
class Convolution:
def __init__(self, W, fb, stride=1, pad=0):
"""
W-- Weights parameters contained FN filters,the shape is (FN,n_C,FH,FW):
FN is the number of filters
n_C is the number of channels in the input_X
FH,FW are the height and width of filters.
fb -- Bias parameters contained in all filters,shape is (1,FN)
stride -- step size
pad -- integer, amount of padding around each image on vertical and horizontal dimensions
self.dW -- gradient of W
self.db -- gradient of fb
"""
self.W = W
self.fb = fb
self.stride = stride
self.pad = pad
self.X = None
self.dW = None
self.db = None
self.out_shape = None
def forward(self, input_X):
"""
input_X-- the shape is (m,n_C,Height,Width)
Return:
Z -- conv output, numpy array of shape (m, FN, n_H, n_W)
"""
self.X = input_X
FN, n_C, FH, FW = self.W.shape
m, n_C, input_h, input_w = self.X.shape
pad = self.pad
stride = self.stride
W = self.W
fb = self.fb
# Compute the dimensions of the CONV output volume
n_H = int((input_h - FH + 2 * pad) / stride) + 1
n_W = int((input_w - FW + 2 * pad) / stride) + 1
# Initialize the output volume Z with zeros.
Z = np.zeros((m, FN, n_H, n_W))
# Create X_pad by padding input_X
# X_pad -- padded input of shape (m, n_C, n_H + 2*pad, n_W + 2*pad)
X_pad = zero_pad(input_X, pad)
for i in range(m): # loop over the batch of training examples
x_pad = X_pad[i] # Select ith training example's padded activation
###implement convolutional operation on each sample and update Z
for h in range(n_H):
for w in range(n_W):
for c in range(FN):
h_start = h * stride
h_end = h_start + FH
w_start = w * stride
w_end = w_start + FW
reception = x_pad[:, h_start:h_end, w_start:w_end]
val = np.sum(reception * W[c] + fb[0, c])
Z[i, c, h, w] = val
# Making sure your output shape is correct
assert Z.shape == (m, FN, n_H, n_W)
self.out_shape = Z.shape
return Z
def backward(self, dZ, learning_rate):
"""
dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, FN, n_H, n_W)
dX -- gradient of the input_X, the shape is the same with input_X's shape
"""
# print("==== Conv backbward ==== ")
assert dZ.shape == self.out_shape
###=====start your code=====#####
# Retrieve dimensions from input_X's shape
m, n_C, input_h, input_w = self.X.shape
# Retrieve dimensions from W's shape
FN, n_C, FH, FW = self.W.shape
# Retrieve dimensions from dZ's shape
m, FN, n_H, n_W = dZ.shape
# Initialize dX, dW, db with zeros
dX = np.zeros(self.X.shape)
self.dW = np.zeros(self.W.shape)
self.db = np.zeros(self.fb.shape)
# Pad Input_X and dX
X_pad = zero_pad(self.X, self.pad)
dX_pad = zero_pad(dX, self.pad)
assert dX.shape == self.X.shape
for i in range(m): # loop over the training examples
# select ith training example from X_pad and dX_pad
x_pad = X_pad[i]
dx_pad = dX_pad[i]
###implement backward propogation on each example, update dX_pad, self.dW and self.db
for h in range(n_H):
for w in range(n_W):
for c in range(FN): # loop over the number of the filters
h_start = h * self.stride
h_end = h_start + FH
w_start = w * self.stride
w_end = w_start + FW
reception = x_pad[:, h_start:h_end, w_start:w_end]
dx_pad[:, h_start:h_end, w_start:w_end] += (self.W[c] * dZ[i, c, h, w])
self.dW[c] += (reception * dZ[i, c, h, w])
self.db += dZ[i, c, h, w]
# Set the dX_pad to the unpaded
if self.pad != 0:
dX[i] = dx_pad[:, self.pad:-self.pad, self.pad:-self.pad]
else:
dX[i] = dx_pad
assert dX.shape == self.X.shape
###update parametes by using gradient descent method
self.W -= learning_rate * self.dW
self.fb -= learning_rate * self.db
return dX
Pooling Layer
# To do add pad operation
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
"""
pool_h -- height of the filter
pool_w -- width of the filter
"""
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.X = None
def forward(self, input_X):
"""
input_X-- the shape is (m,n_C,Height,Width), represents a batch of m conv output
M -- max pooing output
"""
self.X = input_X
m, input_nc, input_h, input_w = self.X.shape
n_C = input_nc
pool_h = self.pool_h
pool_w = self.pool_w
stride = self.stride
pad = self.pad
# Compute the dimensions of the Pool output volume
n_H = int((input_h - pool_h + 2 * pad) / stride) + 1
n_W = int((input_w - pool_w + 2 * pad) / stride) + 1
### Initialize the output volume M with zeros.
M = np.zeros((m, n_C, n_H, n_W))
# padding
X_pad = zero_pad(input_X, pad)
for i in range(m): # loop over the training examples
x_pad = X_pad[i]
### implement max pooling on each example and update M
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
h_start = h * stride
h_end = h_start + pool_h
w_start = w * stride
w_end = w_start + pool_w
# padding
reception = x_pad[
c, h_start:h_end, w_start:w_end
]
val = np.max(reception)
M[i, c, h, w] = val
# Making sure your output shape is correct
assert M.shape == (m, n_C, n_H, n_W)
return M
def backward(self, dM):
"""
Arguments:
dM-- the gradient of the max pooling output.
Return:
dX -- the gradient of the input_X
"""
m, input_nc, input_h, input_w = self.X.shape
m, n_C, n_H, n_W = dM.shape
# Initialize dX with zeros
dX = np.zeros(self.X.shape)
# Pad Input_X and dX
X_pad = zero_pad(self.X, self.pad)
dX_pad = zero_pad(dX, self.pad)
assert dX.shape == self.X.shape
for i in range(m): # loop over the training examples
x_pad = X_pad[i]
dx_pad = dX_pad[i]
### Implement backward propogation on each sample and update dX
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
h_start = h * self.stride
h_end = h_start + self.pool_h
w_start = w * self.stride
w_end = w_start + self.pool_w
reception = x_pad[
c, h_start:h_end, w_start:w_end
]
mask = reception == np.max(reception)
dx_pad[c, h_start:h_end, w_start:w_end] += mask * dM[i, c, h, w]
# Set the dX_pad to the unpaded
if self.pad != 0:
dX[i] = dx_pad[:, self.pad:-self.pad, self.pad:-self.pad]
else:
dX[i] = dx_pad
assert dX.shape == self.X.shape
return dX
FC Layer
class FC:
def __init__(self, W, b):
self.W = W # shape is (n_x,n_unit)
self.b = b # shape is(1,n_unit)
self.X = None
self.origin_x_shape = None
self.dW = None
self.db = None
self.out_shape = None
def forward(self, X):
self.origin_x_shape = X.shape
self.X = X.reshape(X.shape[0], -1)
# Linear
out = np.dot(self.X, self.W) + self.b
self.out_shape = out.shape
return out
def backward(self, dZ, learning_rate):
"""
dZ-- gradient of the Linear output
dX -- gradient of the X
"""
assert dZ.shape == self.out_shape
m = self.X.shape[0]
###compute dW,db
self.dW = np.dot(self.X.T, dZ)
self.db = np.sum(dZ, 0, keepdims=True)
assert self.dW.shape == self.W.shape
assert self.db.shape == self.b.shape
###Compute dX
dX = np.dot(dZ, self.W.T)
assert dX.shape == self.X.shape
##reshape
dX = dX.reshape(self.origin_x_shape)
###update the parametes w and b
self.W -= learning_rate * self.dW
self.b -= learning_rate * self.db
return dX
Relu Layer
class Relu:
def __init__(self):
self.mask = None ###mask matrix for relu
def forward(self, X):
### implement relu function
self.mask = X <= 0
out = (1 - self.mask) * X
return out
def backward(self, dZ):
###compute the gradient of relu
dx = (1 - self.mask) * dZ
return dx
SoftMax Layer
a i = e z i ∑ k e z k a_{i}=\frac{e^{z_{i}}}{\sum_{k} e^{z_{k}}} ai=∑kezkezi
J = − ∑ i y i ln a i J=-\sum_{i} y_{i} \ln a_{i} J=−i∑yilnai
(1)
i
=
j
i=j
i=j 时
∂
a
j
∂
z
i
=
e
z
i
∑
k
e
z
k
−
(
e
z
i
)
2
[
∑
k
e
z
k
]
2
=
a
i
−
a
i
2
\frac{\partial a_{j}}{\partial z_{i}}=\frac{e^{z_{i}} \sum_{k} e^{z_{k}}-\left(e^{z_{i}}\right)^{2}}{\left[\sum_{k} e^{z k}\right]^{2}}=a_{i}-a_{i}^{2}
∂zi∂aj=[∑kezk]2ezi∑kezk−(ezi)2=ai−ai2
(1)
i
≠
j
i \neq j
i=j 时
∂
a
j
∂
z
i
=
−
(
e
z
j
)
(
e
z
i
)
[
∑
k
e
z
k
]
2
=
−
a
j
⋅
a
i
\frac{\partial a_{j}}{\partial z_{i}}=\frac{-\left(e^{z j}\right)\left(e^{z_{i}}\right)}{\left[\sum_{k} e^{z_{k}}\right]^{2}}=-a_{j} \cdot a_{i}
∂zi∂aj=[∑kezk]2−(ezj)(ezi)=−aj⋅ai
推导
∂ J ∂ z i = ∑ j ( ∂ J j ∂ a j ∂ a j ∂ z i ) = ∑ i = j [ ( − y j a j ) ( a i − a i 2 ) ] + ∑ i ≠ j [ ( − y j a j ) ( − a j a i ) ] = − y i ( 1 − a i ) + ∑ i ≠ j [ y j a i ] = − y i + a i ∑ j y j ∵ ∑ j y j = 1 ∴ = − y i + a i \begin{array}{c} \frac{\partial J}{\partial z_{i}} \\ =\sum_{j}\left(\frac{\partial J_{j}}{\partial a_{j}} \frac{\partial a_{j}}{\partial z_{i}}\right) \\ =\sum_{i=j}\left[\left(\frac{-y_{j}}{a_{j}}\right)\left(a_{i}-a_{i}^{2}\right)\right]+\sum_{i \neq j}\left[\left(\frac{-y_{j}}{a_{j}}\right)\left(-a_{j} a_{i}\right)\right] \\ =-y_{i}\left(1-a_{i}\right)+\sum_{i \neq j}\left[y_{j} a_{i}\right] \\ =-y_{i}+a_{i} \sum_{j} y_{j} \\ \because \sum_{j} y_{j}=1 \\ \therefore =-y_{i}+a_{i} \end{array} ∂zi∂J=∑j(∂aj∂Jj∂zi∂aj)=∑i=j[(aj−yj)(ai−ai2)]+∑i=j[(aj−yj)(−ajai)]=−yi(1−ai)+∑i=j[yjai]=−yi+ai∑jyj∵∑jyj=1∴=−yi+ai
class SoftMax:
def __init__(self):
self.y_hat = None # Output of softmax Layer
def forward(self, X):
# Implement softmax fucntion
self.y_hat = softmax(X)
return self.y_hat
def backward(self, labels):
# compute the gradient of softmax
dx = self.y_hat - labels
return dx
Compute Cost
def compute_cost(logits,label):
return cross_entropy_error(label,logits)
A Simple CNN
class SimpleCNN:
def __init__(self):
self.X = None
self.Y = None
self.layers = []
def add_conv_layer(self, n_filter, n_c, f, stride=1, pad=0):
"""
Arguments:
n_c -- the number of channels in the input
n_filter -- the number of filters
f -- the height/width of each filter
Return :
Conv -- a convolutional layer
"""
# initialize parameters of filters: W and fb
W = np.random.randn(n_filter, n_c, f, f)
fb = np.zeros((1, n_filter))
# initialize a convolutional layer
Conv = Convolution(W, fb, stride=stride, pad=pad)
return Conv
def add_maxpool_layer(self, pool_shape, stride=1, pad=0):
"""
Arguments:
pool_shape -- height and width of each filter
Return :
Pool -- a max pooling layer
"""
pool_h, pool_w = pool_shape
# initialize a pooling layer
Pool = Pooling(pool_h, pool_w, stride=stride, pad=pad)
return Pool
def add_FC(self, n_x, n_units):
"""
Arguments:
n_x -- dimensions of input
n_units -- the number of units
Return :
fc_layer -- a FC layer
"""
# initialize parameters of FC: W and b
# He
W = np.random.randn(n_x, n_units) * np.sqrt(2 / n_x)
b = np.zeros((1, n_units))
# initialize a FC layer
fc_layer = FC(W, b)
return fc_layer
def add_relu(self):
relu_layer = Relu()
return relu_layer
def add_softmax(self):
softmax_layer = SoftMax()
return softmax_layer
# Compute the height and width for output of Conv layer or Pool layer
def cacl_out_hw(self, HW, f, stride=1, pad=0):
"""
HW -- height/width of input
f --height/width of filter
Return:
hw -- height/width of the output
"""
hw = int((HW - f + 2 * pad) / stride) + 1
return hw
def init_model(self, train_X, n_classes):
"""
Initialize a simple CNN
"""
N, C, H, W = train_X.shape
# Conv
n_filter = 4 ##the number of filters
f = 7 ## the number of height/width of filter
conv_layer = self.add_conv_layer(n_filter=n_filter, n_c=C, f=f, stride=1)
out_h = self.cacl_out_hw(H, f)
out_w = self.cacl_out_hw(W, f)
out_ch = n_filter
self.layers.append(conv_layer)
# Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)
# Pool
f = 2
pool_layer = self.add_maxpool_layer(pool_shape=(f, f), stride=1)
out_h = self.cacl_out_hw(out_h, f)
out_w = self.cacl_out_hw(out_w, f)
self.layers.append(pool_layer)
# FC
n_x = out_ch * out_h * out_w
n_units = 32
fc_layer = self.add_FC(n_x=n_x, n_units=n_units)
self.layers.append(fc_layer)
# Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)
# FC
fc_layer = self.add_FC(n_x=n_units, n_units=10)
self.layers.append(fc_layer)
# SoftMax
softmax_layer = self.add_softmax()
self.layers.append(softmax_layer)
def forward_progation(self, train_X, print_out=False):
"""
Arguments:
train_X -- train data, shape is (m,n_C,Height,Width)
Return :
A -- Softmax output
"""
N, C, H, W = train_X.shape
index = 0
# Conv
conv_layer = self.layers[index]
index = index + 1
X = conv_layer.forward(train_X)
if print_out:
print(f"after Conv({index}):" + str(X.shape))
# Relu
relu_layer = self.layers[index]
index = index + 1
X = relu_layer.forward(X)
if print_out:
print(f"after Relu({index}):" + str(X.shape))
# Pool
pool_layer = self.layers[index]
index = index + 1
X = pool_layer.forward(X)
if print_out:
print(f"after MaxPooling({index}):" + str(X.shape))
# FC
fc_layer = self.layers[index]
index = index + 1
X = fc_layer.forward(X)
if print_out:
print(f"after FC({index}):" + str(X.shape))
# Relu
# your code
relu_layer = self.layers[index]
index = index + 1
X = relu_layer.forward(X)
if print_out:
print(f"after Relu({index}):" + str(X.shape))
# FC
# your code
relu_layer = self.layers[index]
index = index + 1
X = relu_layer.forward(X)
if print_out:
print(f"after FC({index}):" + str(X.shape))
# SoftMax Layer
softmax_layer = self.layers[index]
index = index + 1
A = softmax_layer.forward(X)
if print_out:
print(f"after SoftMax({index}):" + str(X.shape))
return A
def back_progation(self, train_y, learning_rate):
# SoftMax
index = len(self.layers) - 1
sofmax_layer = self.layers[index]
index -= 1
dZ = sofmax_layer.backward(train_y)
# FC
fc_layer = self.layers[index]
dZ = fc_layer.backward(dZ, learning_rate)
index -= 1
# Relu
relu_layer = self.layers[index]
dZ = relu_layer.backward(dZ)
index -= 1
# FC
fc_layer = self.layers[index]
dZ = fc_layer.backward(dZ, learning_rate)
index -= 1
# Pooling
pool_layer = self.layers[index]
dZ = pool_layer.backward(dZ)
index -= 1
# Relu
relu_layer = self.layers[index]
dZ = relu_layer.backward(dZ)
index -= 1
# Conv
conv_layer = self.layers[index]
dZ = conv_layer.backward(dZ, learning_rate)
index -= 1
def get_minibatch(self, batch_data, minibatch_size, num):
m_examples = batch_data.shape[0]
minibatches = math.ceil(m_examples / minibatch_size)
if num < minibatches:
return batch_data[num * minibatch_size : (num + 1) * minibatch_size]
else:
return batch_data[num * minibatch_size : m_examples]
def optimize(
self, train_X, train_y, minibatch_size, learning_rate=0.05, num_iters=500
):
m = train_X.shape[0]
num_batches = math.ceil(m / minibatch_size)
costs = []
tiny_costs = []
for iteration in range(num_iters):
iter_cost = 0
for batch_num in range(num_batches):
minibatch_X = self.get_minibatch(train_X, minibatch_size, batch_num)
minibatch_y = self.get_minibatch(train_y, minibatch_size, batch_num)
# forward
A = self.forward_progation(minibatch_X)
# calculate cost
cost = compute_cost(A, minibatch_y)
# backward
self.back_progation(minibatch_y, learning_rate)
if iteration % 100 == 0:
iter_cost += cost / num_batches
# append cost
tiny_costs.append(cost)
# print cost
if iteration % 100 == 0:
print("After %d iters ,cost is :%g" % (iteration, iter_cost))
costs.append(iter_cost)
# plot the cost
plt.plot(costs)
plt.xlabel("iterations/hundreds")
plt.ylabel("costs")
plt.show()
# plot the tiny cost
plt.plot(tiny_costs)
plt.xlabel("iterations")
plt.ylabel("tiny_costs")
plt.show()
def predict(self, train_X):
"""
train_X -- train data
one_hot -- one_hot vector, represents the predicted label
"""
# implement forward propagation
logits = self.forward_progation(train_X)
# convert logits into one_hot vectors
one_hot = one_hot_label(logits)
return one_hot
def fit(self, train_X, train_y):
"""
training
"""
self.X = train_X
self.Y = train_y
n_y = train_y.shape[1]
n = train_X.shape[0]
self.init_model(train_X, n_classes=n_y)
self.optimize(
train_X, train_y, minibatch_size=10, learning_rate=0.05, num_iters=1000
)
logits = self.predict(train_X)
accuracy = np.sum(np.argmax(logits, axis=1) == np.argmax(train_y, axis=1)) / n
print("train accuracy:%g" % (accuracy))
Training
convNet = SimpleCNN()
sample_n = 100
# Considering the speed, we select 30 samples from original datasets for training
train_X = X_train[0:sample_n]
train_y = y_train[0:sample_n]
convNet.fit(train_X, train_y)
After 0 iters ,cost is :4.87611
After 100 iters ,cost is :2.28106
After 200 iters ,cost is :2.28106
After 300 iters ,cost is :2.28106
After 400 iters ,cost is :2.28106
After 500 iters ,cost is :2.28106
After 600 iters ,cost is :2.28106
After 700 iters ,cost is :2.28106
After 800 iters ,cost is :2.28106
After 900 iters ,cost is :2.28106
train accuracy:0.14
Predict
logits = convNet.predict(X_test[:sample_n])
# calculate acurracy
accuracy = np.sum(np.argmax(logits, axis=1) == np.argmax(y_test[:sample_n], axis=1)) / sample_n
print("test accuracy:%g" % (accuracy))
test accuracy:0.14