0.cnn计算原理,参考:https://zhuanlan.zhihu.com/p/29119239
np.pad()函数讲解,参考:https://blog.csdn.net/qq_37535492/article/details/88766347
对于np.pad的讲解,在随机生成的图片维度(m,n_h,n_w,n_c)
1.diy写cnn的前向传播
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import numpy as np
import matplotlib.pyplot as plt
# 1.定义边界填充函数
def zero_pad(X,pad):
X_paded=np.pad(X,((0,0),# 第一维不填充
(pad,pad), #第二维填充2个pad
(pad,pad), # 第三维填充2个pad
(0,0)),# 第四维不填充
'constant',constant_values=0)# 连续一样的值填充
return X_paded
# 测试
# X=np.random.randn(4,3,3,2)
# print(X.shape)
# X_paded=zero_pad(X,2)
# print(X_paded.shape)
#
# # 绘制图显示
# fig,axarr=plt.subplots(1,2)
# axarr[0].set_title('x')
# axarr[0].imshow(X[0,:,:,0])
# axarr[1].set_title('x_paded')
# axarr[1].imshow(X_paded[0,:,:,0])
# plt.show()
# 2.单步卷积 -一个x的切片与w(过滤器)相乘,并+b,返回求和的实数值
def conv_single_step(a_slice_prev,W,b):
s=np.multiply(a_slice_prev,W)+b
Z=np.sum(s)
return Z
# 测试
# a_slice_prev=np.random.randn(4,3,3)
# w=np.random.randn(4,3,3)
# b=np.random.randn(1)或者(1,1,1)
# z=conv_single_step(a_slice_prev,w,b)
# print(z)
# 3.定义卷积层的向前传播函数
def conv_forward(A_prev,W,b,hparameters):
(m,n_h_prev,n_w_prev,n_c_prev)=A_prev.shape
(f,f,n_C_prev,n_C)=W.shape
# 获取超参数的值
stride=hparameters['stride']
pad=hparameters['pad']
# 计算卷积后的维度
n_h=int((n_h_prev-f+2*pad)/stride)+1
n_w=int((n_w_prev-f+2*pad)/stride)+1
Z=np.zeros((m,n_h,n_w,n_C))
A_prev_paded=zero_pad(A_prev,pad)
for i in range(m):
a_prev_pad=A_prev_paded[i]
for h in range(n_h):
for w in range(n_w):
for c in range(n_C):
v_start=h*stride #垂直开始的位置--n_h轴
v_end=v_start+f
h_start=w*stride # 水平开始的位置--n_w轴
h_end=h_start+f
# 切片取值
a_silce_prev=a_prev_pad[v_start:v_end,h_start:h_end,:]
#卷积
Z[i,h,w,c]=conv_single_step(a_silce_prev,W[:,:,:,c],b[:,:,:,c])
assert(Z.shape==(m,n_h,n_w,n_C))
#缓存值
cache=(A_prev,W,b,hparameters)
return (Z,cache)
# 测试
# np.random.seed(1)
# A_prev=np.random.randn(10,4,4,3)
# W=np.random.randn(2,2,3,8)
# b=np.random.randn(1,1,1,8)
# hparameters={
# 'pad':2,
# 'stride':1
# }
# Z,cache=conv_forward(A_prev,W,b,hparameters)
# print(Z.shape)
# 4. 定义池化层的前向传播函数
def pool_forward(A_prev,hparameters,mode='max'):
(m,n_h_prev,n_w_prev,n_c_prev)=A_prev.shape
f=hparameters['f']
stride=hparameters['stride']
# 计算输出维度
n_h=int((n_h_prev-f)/2)+1
n_w=int((n_w_prev-f)/2)+1
n_c=n_c_prev # 通道数不变
# 初始化输出矩阵
A=np.zeros((m,n_h,n_w,n_c))
# 遍历
for i in range(m):
for h in range(n_h):
for w in range(n_w):
for c in range(n_c):
h_start=h*stride
h_end=h_start+f
v_start=w*stride
v_end=v_start+f
# 切片
a_slice=A_prev[i,v_start:v_end,h_start:h_end,:]
if mode=='max':
A[i,h,w,c]=np.max(a_slice)
elif mode=='mean':
A[i,h,w,c]=np.mean(a_slice)
assert (A.shape==(m,n_h,n_w,n_c))
cache=(A_prev,hparameters)
return (A,cache)
# 测试
# np.random.seed(1)
# A_prev=np.random.randn(2,4,4,3)
# hparamenters={
# 'f':4,
# 'stride':1
# }
# A,cache=pool_forward(A_prev,hparamenters,'max')
# print("max="+str(A))
# A,cache=pool_forward(A_prev,hparamenters,'mean')
# print("mean="+str(A))
# print(A.shape)
#5,卷积层的反向传播da dw db===========了解
def conv_backward(dZ,cache):
"""
实现卷积层的反向传播
参数:
dZ - 卷积层的输出Z的 梯度,维度为(m, n_H, n_W, n_C)
cache - 反向传播所需要的参数,conv_forward()的输出之一
返回:
dA_prev - 卷积层的输入(A_prev)的梯度值,维度为(m, n_H_prev, n_W_prev, n_C_prev)
dW - 卷积层的权值的梯度,维度为(f,f,n_C_prev,n_C)
db - 卷积层的偏置的梯度,维度为(1,1,1,n_C)
"""
#获取cache的值
(A_prev, W, b, hparameters) = cache
#获取A_prev的基本信息
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
#获取dZ的基本信息
(m,n_H,n_W,n_C) = dZ.shape
#获取权值的基本信息
(f, f, n_C_prev, n_C) = W.shape
#获取hparaeters的值
pad = hparameters["pad"]
stride = hparameters["stride"]
#初始化各个梯度的结构
dA_prev = np.zeros((m,n_H_prev,n_W_prev,n_C_prev))
dW = np.zeros((f,f,n_C_prev,n_C))
db = np.zeros((1,1,1,n_C))
#前向传播中我们使用了pad,反向传播也需要使用,这是为了保证数据结构一致
A_prev_pad = zero_pad(A_prev,pad)
dA_prev_pad = zero_pad(dA_prev,pad)
#现在处理数据
for i in range(m):
#选择第i个扩充了的数据的样本,降了一维。
a_prev_pad = A_prev_pad[i]
da_prev_pad = dA_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
#定位切片位置
vert_start = h
vert_end = vert_start + f
horiz_start = w
horiz_end = horiz_start + f
#定位完毕,开始切片
a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
#切片完毕,使用上面的公式计算梯度
da_prev_pad[vert_start:vert_end, horiz_start:horiz_end,:] += W[:,:,:,c] * dZ[i, h, w, c]
dW[:,:,:,c] += a_slice * dZ[i,h,w,c]
db[:,:,:,c] += dZ[i,h,w,c]
#设置第i个样本最终的dA_prev,即把非填充的数据取出来。
dA_prev[i,:,:,:] = da_prev_pad[pad:-pad, pad:-pad, :]
#数据处理完毕,验证数据格式是否正确
assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
return (dA_prev,dW,db)
# 6. 池化层的反向传播
# 6.1 最大值池化层的反向传播
def create_mask_from_window(x):
"""
从输入矩阵中创建掩码,以保存最大值的矩阵的位置。
参数:
x - 一个维度为(f,f)的矩阵
返回:
mask - 包含x的最大值的位置的矩阵
"""
mask = x == np.max(x)
return mask
# 6.2 平均值的反向传播
def distribute_value(dz,shape):
"""
给定一个值,为按矩阵大小平均分配到每一个矩阵位置中。
参数:
dz - 输入的实数
shape - 元组,两个值,分别为n_H , n_W
返回:
a - 已经分配好了值的矩阵,里面的值全部一样。
"""
#获取矩阵的大小
(n_H , n_W) = shape
#计算平均值
average = dz / (n_H * n_W)
#填充入矩阵
a = np.ones(shape) * average
return a
# 池化层的反向传播
def pool_backward(dA,cache,mode = "max"):
"""
实现池化层的反向传播
参数:
dA - 池化层的输出的梯度,和池化层的输出的维度一样
cache - 池化层前向传播时所存储的参数。
mode - 模式选择,【"max" | "average"】
返回:
dA_prev - 池化层的输入的梯度,和A_prev的维度相同
"""
#获取cache中的值
(A_prev , hparaeters) = cache
#获取hparaeters的值
f = hparaeters["f"]
stride = hparaeters["stride"]
#获取A_prev和dA的基本信息
(m , n_H_prev , n_W_prev , n_C_prev) = A_prev.shape
(m , n_H , n_W , n_C) = dA.shape
#初始化输出的结构
dA_prev = np.zeros_like(A_prev)
#开始处理数据
for i in range(m):
a_prev = A_prev[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
#定位切片位置
vert_start = h
vert_end = vert_start + f
horiz_start = w
horiz_end = horiz_start + f
#选择反向传播的计算方式
if mode == "max":
#开始切片
a_prev_slice = a_prev[vert_start:vert_end,horiz_start:horiz_end,c]
#创建掩码
mask = create_mask_from_window(a_prev_slice)
#计算dA_prev
dA_prev[i,vert_start:vert_end,horiz_start:horiz_end,c] += np.multiply(mask,dA[i,h,w,c])
elif mode == "average":
#获取dA的值
da = dA[i,h,w,c]
#定义过滤器大小
shape = (f,f)
#平均分配
dA_prev[i,vert_start:vert_end, horiz_start:horiz_end ,c] += distribute_value(da,shape)
#数据处理完毕,开始验证格式
assert(dA_prev.shape == A_prev.shape)
return dA_prev
# 测试
# np.random.seed(1)
# A_prev = np.random.randn(5, 5, 3, 2)
# hparameters = {"stride" : 1, "f": 2}
# A, cache = pool_forward(A_prev, hparameters)
# dA = np.random.randn(5, 4, 2, 2)
#
# dA_prev = pool_backward(dA, cache, mode = "max")
# print("mode = max")
# print('mean of dA = ', np.mean(dA))
# print('dA_prev[1,1] = ', dA_prev[1,1])
# print()
# dA_prev = pool_backward(dA, cache, mode = "average")
# print("mode = average")
# print('mean of dA = ', np.mean(dA))
# print('dA_prev[1,1] = ', dA_prev[1,1])
2.用tensorflow实现cnn
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cnn_utils
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
'''
创建的模型:conv-relu-maxpool-conv-relu-maxpool-fc
'''
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = cnn_utils.load_dataset()
# index=6
# plt.imshow(X_train_orig[index])
# plt.show()
# print('y='+str(np.squeeze(Y_train_orig[:,index])))
X_train = X_train_orig/255.
X_test = X_test_orig/255.
Y_train = cnn_utils.convert_to_one_hot(Y_train_orig, 6).T
Y_test = cnn_utils.convert_to_one_hot(Y_test_orig, 6).T
# print ("number of training examples = " + str(X_train.shape[0]))
# print ("number of test examples = " + str(X_test.shape[0]))
# print ("X_train shape: " + str(X_train.shape))
# print ("Y_train shape: " + str(Y_train.shape))
# print ("X_test shape: " + str(X_test.shape))
# print ("Y_test shape: " + str(Y_test.shape))
conv_layers = {}
# 1.创建占位符
def create_placeholders(n_h0,n_w0,n_c0,n_y):
# 小批量数据块,输入的样本数量可能不固定,使用None作为可变数量
X=tf.placeholder(tf.float32,[None,n_h0,n_w0,n_c0])
Y=tf.placeholder(tf.float32,[None,n_y])
return X,Y
# 测试
# X , Y = create_placeholders(64,64,3,6)
# print ("X = " + str(X))
# print ("Y = " + str(Y))
# 2.初始化参数
# 初始化卷积神经网络的w1和w2,不用初始化全连接层,框架会自动初始化
def initialize_parameters():
tf.set_random_seed(1)
W1=tf.get_variable('W1',[4,4,3,8],initializer=tf.contrib.layers.xavier_initializer(seed=0))
W2=tf.get_variable('W2',[2,2,8,16],initializer=tf.contrib.layers.xavier_initializer(seed=0))
parameters={
'W1':W1,
'W2':W2
}
return parameters
# 测试
# tf.reset_default_graph()
# with tf.Session() as sess_test:
# parameters=initialize_parameters()
# init=tf.global_variables_initializer()
# sess_test.run(init)
# print("w1="+str(parameters['W1'].eval()[1,1,1]))
# print("w2="+str(parameters['W2'].eval()[1,1,1]))
#
# sess_test.close()
# 3.前向传播
'''
模型:CONV2D→RELU→MAXPOOL→CONV2D→RELU→MAXPOOL→FULLCONNECTED
全连接层(FC):使用没有非线性激活函数的全连接层。这里不要调用SoftMax,
这将导致输出层中有6个神经元,然后再传递到softmax。
在TensorFlow中,softmax和cost函数被集中到一个函数中,在计算成本时您将调用不同的函数。
'''
def forward_propagation(X,parameters):
W1=parameters['W1']
W2=parameters['W2']
# Conv2d : 步伐:1,填充方式:“SAME”
Z1=tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')
# RELU
A1=tf.nn.relu(Z1)
# Max pool : 过滤器大小:8x8,步伐:8x8,填充方式:“SAME”
P1=tf.nn.max_pool(A1,ksize=[1,8,8,1],strides=[1,8,8,1],padding='SAME')
# Conv2d :步伐:1,填充方式:“SAME”
Z2=tf.nn.conv2d(P1,W2,strides=[1,1,1,1],padding='SAME')
# RELU
A2=tf.nn.relu(Z2)
# Max pool : 过滤器大小:4x4,步伐:4x4,填充方式:“SAME”
P2=tf.nn.max_pool(A2,ksize=[1,4,4,1],strides=[1,4,4,1],padding='SAME')
# 一维化上一维的输出
P=tf.contrib.layers.flatten(P2)
# 全连接层(FC):使用没有非线性激活函数的全连接层
Z3=tf.contrib.layers.fully_connected(P,6,activation_fn=None)
return Z3
# 测试
# tf.set_random_seed(1)
# tf.reset_default_graph()
# with tf.Session() as sess_test:
# X,Y=create_placeholders(64,64,3,6)
# parameters=initialize_parameters()
# Z3=forward_propagation(X,parameters)
#
# init=tf.global_variables_initializer()
# sess_test.run(init)
# a=sess_test.run(Z3,{X:np.random.randn(2,64,64,3),Y:np.random.randn(2,6)})
# print('z3='+str(a))
# sess_test.close()
# 4.计算成本
def compute_cost(Z3,Y):
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))
return cost
# 测试
# tf.reset_default_graph()
#
# with tf.Session() as sess_test:
# np.random.seed(1)
# X,Y = create_placeholders(64,64,3,6)
# parameters = initialize_parameters()
# Z3 = forward_propagation(X,parameters)
# cost = compute_cost(Z3,Y)
#
# init = tf.global_variables_initializer()
# sess_test.run(init)
# a = sess_test.run(cost,{X: np.random.randn(4,64,64,3), Y: np.random.randn(4,6)})
# print("cost = " + str(a))
#
# sess_test.close()
# 5.构建模型
def model(X_train,Y_train,X_test,Y_test,learning_rate=0.009,num_epochs=100,minibatch_size=64,print_cost=True,isPlot=True):
ops.reset_default_graph()#能够重新运行模型而不覆盖tf变量
tf.set_random_seed(1)
seed=3
(m,n_h,n_w,n_c)=X_train.shape
n_y=Y_train.shape[1]
costs=[]
# 为当前维度创建占位符
X,Y=create_placeholders(n_h,n_w,n_c,n_y)
# 初始化参数
parameters=initialize_parameters()
# 前向传播
Z3=forward_propagation(X,parameters)
#计算成本
cost=compute_cost(Z3,Y)
#反向传播,选择优化器即可
optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化全局变量
init=tf.global_variables_initializer()
# 开始运行
with tf.Session() as sess:
# 初始化参数
sess.run(init)
# 开始遍历epoch
for epoch in range(num_epochs):
minibatch_cost=0
num_minibathes=int(m/minibatch_size) # 数据块的数量
seed=seed+1
minbatches=cnn_utils.random_mini_batches(X_train,Y_train,minibatch_size,seed)
# 遍历每个数据块
for minibatch in minbatches:
(minibatch_x,minibatch_y)=minibatch
#最小化当前数据块的成本
_,temp_cost=sess.run([optimizer,cost],feed_dict={X:minibatch_x,Y:minibatch_y})
# 累加数据块的成本值----注意这里除以num_minibathes
minibatch_cost+=temp_cost/num_minibathes
if print_cost:
if epoch%5==0:
print("当前是第"+str(epoch)+"次迭代,成本为"+str(minibatch_cost))
if epoch%10==0:
costs.append(minibatch_cost)
#数据处理完毕,开始绘制成本函数曲线
if isPlot:
plt.plot(np.squeeze(costs))
plt.xlabel('iterations(per tens)')
plt.ylabel('cost')
plt.title('learning_rate:'+str(learning_rate))
plt.show()
# 开始预测数据
# 计算当前的预测情况
predict_op=tf.arg_max(Z3,1)
correct_prediction=tf.equal(predict_op,tf.arg_max(Y,1))
# 计算准确度
accuracy=tf.reduce_mean(tf.cast(correct_prediction,'float'))
print('correct_predict_accuracy='+str(accuracy))
#计算训练集的准确度
train_accuracy=accuracy.eval({X:X_train,Y:Y_train})
test_accuracy=accuracy.eval({X:X_test,Y:Y_test})
print("训练集准确度:" + str(train_accuracy))
print("测试集准确度:" + str(test_accuracy))
return (train_accuracy, test_accuracy, parameters)
# 启动模型
train_accuracy, test_accuracy, parameters=model(X_train,Y_train,X_test,Y_test,num_epochs=150)
# 预测函数
def predict(parameters):
X = tf.placeholder(tf.float32, [None,64,64,3])
Z3=forward_propagation(X,parameters)
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
a=sess.run(Z3,feed_dict={X:np.random.randn(1,64,64,3)})
b=np.squeeze(np.argmax(a, axis=1))
sess.close()
return b
# 测试
# print("预测结果:"+str(predict(parameters)))
3.需要的cnn_utils
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import math
import h5py
import numpy as np
def load_dataset():
train_dataset = h5py.File('datasets/train_signs.h5', 'r')
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
train_set_y_orig = np.array(train_dataset["train_set_y"][:])
test_dataset = h5py.File('datasets/test_signs.h5', 'r')
test_set_x_orig = np.array(test_dataset["test_set_x"][:])
test_set_y_orig = np.array(test_dataset["test_set_y"][:])
classes = np.array(test_dataset["list_classes"][:])
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def convert_to_one_hot(Y,c):
Y1=np.eye(c)
Y2=Y.reshape(-1)
Y=Y1[Y2].T
return Y
# Y=np.array([0,1,2,1,2])
# Y=convert_to_one_hot(Y,3)
def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
m = X.shape[0]
np.random.seed(seed)
mini_batches = []
# 1.shuffle
permutation = list(np.random.permutation(m))
shuffle_X = X[permutation,:,:,: ]
shuffle_Y = Y[permutation,:]
# 2. partition
num_complete_minibatches = math.floor(m / mini_batch_size)
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffle_X[k * mini_batch_size: k * mini_batch_size + mini_batch_size,:,:,:]
mini_batch_Y = shuffle_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size,:]
mini_batches.append((mini_batch_X, mini_batch_Y))
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffle_X[num_complete_minibatches * mini_batch_size: m,:,:,:]
mini_batch_Y = shuffle_Y[num_complete_minibatches * mini_batch_size: m,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches