正向传播(1)卷积
def sigmoid(z):
return 1/(1+np.exp(-z))
def cnnConvolve(filterDim, numFilters, images, W, b,hparameters={'stride':1,'pad':0}):
numImages = images.shape[0]
imageDim = images.shape[1]
stride=hparameters['stride']#为图方便,默认为1
pad=hparameters['pad']
convDim = (imageDim - filterDim+2*pad)/stride + 1;
convolvedFeatures = np.zeros([convDim, convDim, numFilters, numImages])
for imageNum in range(numImages):
for filterNum in range(numFilters):
convolvedImage = np.zeros([convDim, convDim]);
for i in range(convDim):
for j in range(convDim):
temp_image=images[imageNum,:,:]
temp_w=W[filterNum,:,:]
temp_b=b[filterNum]
v_start=i*stride
h_start=j*stride
temp_conved=sigmoid(np.sum(temp_image[v_start:v_start+filterDim,h_start:h_start+filterDim]*temp_w)+temp_b)
convolvedImage[i,j]=temp_conved
convolvedFeatures[:,:,filterNum,imageNum]=convolvedImage
return convolvedFeatures
(2)、池化
def cnnPool(poolDim, convolvedFeatures,mode="max"):
numImages = convolvedFeatures.shape[3]
numFilters = convolvedFeatures.shape[2]
convolvedDim = convolvedFeatures.shape[1]
print(convolvedDim/poolDim)
pooledFeatures = np.zeros([int(convolvedDim/poolDim),int(convolvedDim/poolDim),numFilters,numImages])
#pooled_dim=(convolvedDim+2*p-f)/s+1
pooled_dim=int(convolvedDim/poolDim)
padding=convolvedDim/poolDim
for imageNum in range(numImages):
for filterNum in range(numFilters):
pooledImage = np.zeros([pooled_dim, pooled_dim]);
for i in range(0,convolvedDim,poolDim):
if i+poolDim>=convolvedDim:
break
for j in range(0,convolvedDim,poolDim):
#print(i,j)
if j+poolDim>=convolvedDim:
break
col=int(i/poolDim)
row=int(j/poolDim)
temp_image=convolvedFeatures[i:i+poolDim,j:j+poolDim,filterNum,imageNum]
if mode=='max':
pooledImage[col,row]=np.max(temp_image)
elif mode=='mean':
pooledImage[col,row]=np.mean(temp_image)
pooledFeatures[:,:,filterNum,imageNum]=pooledImage
return pooledFeatures
反向传播:包含详细注释与公式解释,注释有点长拖到代码很后面了
公式推导:
代码:(1)卷积层反向传播:
def conv_backword(dz,cache):
a_input,w,b,hparameters=cache#a表示当前的卷积层的输出,也就是下一个卷积层的输入,dz表示下一个卷积层未经过激活函数处理的值z的梯度导数,w表示卷积核函数
(m,col,row)=a_input.shape#m个样本
(n_c,f,f)=w.shape#n_c个filter
stride=hparameters['stride']
pad=hparameters['pad']
(m,n_h,n_w)=dz.shape
da_input=np.zeros(m,col,row)
dw=np.zeros(n_c,f,f)#n_c个filter
db=np.zeros(n_c,1,1)
for i in range(m):
a_temp=a_input[i,:,:]
da_temp=a_input[i,:,:]
for h in n_h:
for w in n_w:
for c in n_c:
v_start=h*stride
v_end=v_start+f
h_start=w*stride
h_end=h_start+f
a_slice=a_temp[v_start:v_end,h_start:h_end]
da_temp[v_start:v_end,h_start:h_end]+=w[c,:,:]*dz[i,h,w]#对每个z,z都是w矩阵和这个slice卷积得到的,且每个x在不同的slice中会得到不同的z,所以这个slice的每个x的导数都需要加上w矩阵对应的w乘以该slice对应的dz,对输入的每个a在对应slice不同时都会有相应的运算,所以在每次的slice都需要加上对应的导数
dw[c,:,:]+=a_slice*dz[i,h,w]#对w矩阵中的每个w,在与不同slice卷积时都是与该slice中对应输入a做乘积得到z,所以对每个slice,每个w都需要加上对应输入a与dz的乘积
db[c,:,:]+=dz[i,h,w]#与dw同理,只不过没有乘输入a
da_input[i,:,:]=da_temp
return da_input,dw,db
(2)、池化层反向传播:
def create_mask_frommat(x):
mask=(x==np.max(x))#获得矩阵中只有最大值为1其余都为0
return mask
def distribute_value(dz,shape):
(n_h,n_w)=shape
average=dz/(n_h*n_w)
a=average*np.ones(shape)#对矩阵中的每个数都除以矩阵元素总个数,做平均池化的时候就是这么做的
return a
def pool_backword(dz,cache,mode):#池化层没有w和b的导数
(a_input,hparamters)=cache
stride=hparameters['stride']
f=hparameters['f']
m,col,row=a_input.shape#m个样本
m,n_h,n_w=dz.shape
da_input=np.zeros(a_input.shape)
for i in range(m):
a_temp=a_input[i,:,:]
for h in range(n_h):
for w in range(n_w):
v_start=h*stride
v_end=v_start+f
h_start=w*stride
h_end=h_start+f
if mode=='max':
a_slice=a_temp[v_start:v_end,h_start:h_end]
mask=create_mask_frommat(a_slice)
da_input[i,:,:]+=mask*dz[i,h,w]#每次slice中只有最大的那一个最后被纳入z中,所以只需要最大的那个乘以dz,其余的都不需要
elif mode=='average':
dz_temp=dz[i,:,:]
shape=(f,f)
a_slice=a_temp[v_start:v_end,h_start:h_end]
da_input[i,:,:]+=distribute_value(dz_temp,shape)#每次的slice都是将每个输入a乘上1/f*f然后加总得到z,所以每个输入a的导数都需要加上1、f*f乘以对应slice的dz
return da_input
目前卷积神经网络的实现先到这里,目前对卷积神经网络通过实现代码已经有了很具体的了解,后面具体实现功能还需要涉及全连接层,相对复杂,直接开始使用深度学习框架了