代码是不全的,没时间补全,只是用于了解卷积神经网络的正向反向传播
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
class ReluActivator(object):# Relu激活函数
def forward(self, weighted_input):
return max(0, weighted_input)
def backward(self, output):
return 1 if output > 0 else 0 # Relu函数的反向梯度传播
class Filter(object):
def __init__(self, width, height, depth):
self.weights = np.random.uniform(-1e-4, 1e-4,
(depth, height, width)) # 权重
self.bias = 0 # 偏置
self.weights_grad = np.zeros(self.weights.shape) # 权重梯度
self.bias_grad = 0 # 偏置梯度
def __repr__(self):
return 'filter weights:\n%s\nbias:\n%s' % (
repr(self.weights), repr(self.bias))
def get_weights(self):
return self.weights
def get_bias(self):
return self.bias
def update(self, learning_rate): # 更新权重、偏置
self.weights -= learning_rate * self.weights_grad
self.bias -= learning_rate * self.bias_grad
class ConvLayer(object): # 卷积层
def __init__(self, input_width, input_height,
channel_number, filter_width,
filter_height, filter_number,
zero_padding, stride, activator,
learning_rate):
self.input_width = input_width
self.input_height = input_height
self.channel_number = channel_number
self.filter_width = filter_width
self.filter_height = filter_height
self.filter_number = filter_number
self.zero_padding = zero_padding
self.stride = stride
self.output_width = \
ConvLayer.calculate_output_size(
self.input_width, filter_width, zero_padding,
stride)
self.output_height = \
ConvLayer.calculate_output_size(
self.input_height, filter_height, zero_padding,
stride)
self.output_array = np.zeros((self.filter_number,
self.output_height, self.output_width))
self.filters = []
for i in range(filter_number):
self.filters.append(Filter(filter_width,
filter_height, self.channel_number))
self.activator = activator
self.learning_rate = learning_rate
def calculate_output_size(input_size, filter_size, zero_padding, stride):
return (input_size - filter_size + 2 * zero_padding) / stride + 1
def forward(self, input_array):
'''
正向传播:计算卷积层的输出
输入保存在self.input_array
输出结果保存在self.output_array
'''
self.input_array = input_array
self.padded_input_array = padding(input_array,self.zero_padding) # 补零扩展
for f in range(self.filter_number): # 执行所有卷积核
filter = self.filters[f] # 当前卷积核
conv(self.padded_input_array, # 卷积操作
filter.get_weights(), self.output_array[f],
self.stride, filter.get_bias())
element_wise_op(self.output_array, # 激活操作
self.activator.forward)
# 对numpy数组进行按元素操作
def element_wise_op(array, op):
for i in np.nditer(array,op_flags=['readwrite']):
i[...] = op(i)
def conv(input_array,kernel_array,output_array,stride, bias):
'''
计算卷积,自动适配输入为2D和3D的情况
'''
channel_number = input_array.ndim
output_width = output_array.shape[1]
output_height = output_array.shape[0]
kernel_width = kernel_array.shape[-1]
kernel_height = kernel_array.shape[-2]
for i in range(output_height):
for j in range(output_width):
output_array[i][j] = (
get_patch(input_array, i, j, kernel_width,
kernel_height, stride) * kernel_array
).sum() + bias
# 为数组增加Zero padding
def padding(input_array, zp):
'''
为数组增加Zero padding,自动适配输入为2D和3D的情况
'''
if zp == 0:
return input_array
else:
if input_array.ndim == 3:
input_width = input_array.shape[2]
input_height = input_array.shape[1]
input_depth = input_array.shape[0]
padded_array = np.zeros((
input_depth,
input_height + 2 * zp,
input_width + 2 * zp))
padded_array[:,
zp: zp + input_height,
zp: zp + input_width] = input_array
return padded_array
elif input_array.ndim == 2:
input_width = input_array.shape[1]
input_height = input_array.shape[0]
padded_array = np.zeros((
input_height + 2 * zp,
input_width + 2 * zp))
padded_array[zp: zp + input_height,
zp: zp + input_width] = input_array
return padded_array
# 由当前层激活函数加权输入梯度,计算上一层激活函数的加权输入net 的梯度: 先计算对激活函数输出a 的梯度,再乘上激活函数的梯度
def bp_sensitivity_map(self, sensitivity_array,activator):# 将残差项传递到上一层
'''
计算传递到上一层的sensitivity map
sensitivity_array: 本层的sensitivity map
activator: 上一层的激活函数
'''
# 当前层残差 sensitivity_array -扩展-> expanded_array -补零-> padded_array
# 上一层残差 padded_array + 多个filte -翻转、卷积、累加 -> delta_array
# 处理卷积步长,对原始sensitivity map进行扩展
expanded_array = self.expand_sensitivity_map(sensitivity_array)
# full卷积,对sensitivitiy map进行zero padding
# 虽然原始输入的zero padding单元也会获得残差
# 但这个残差不需要继续向上传递,因此就不计算了
expanded_width = expanded_array.shape[2]
zp = (self.input_width +
self.filter_width - 1 - expanded_width) / 2
padded_array = padding(expanded_array, zp)
# 初始化delta_array,用于保存传递到上一层的
# sensitivity map
self.delta_array = self.create_delta_array()
# 对于具有多个filter的卷积层来说,最终传递到上一层的
# sensitivity map相当于所有的filter的
# sensitivity map之和
for f in range(self.filter_number):
filter = self.filters[f]
# 将filter权重翻转180度
flipped_weights = np.array(map(
lambda i: np.rot90(i, 2),
filter.get_weights()))
# 计算与一个filter对应的delta_array
delta_array = self.create_delta_array()
for d in range(delta_array.shape[0]):
conv(padded_array[f], flipped_weights[d],
delta_array[d], 1, 0)
self.delta_array += delta_array
# 将计算结果与激活函数的偏导数做element-wise乘法操作
derivative_array = np.array(self.input_array)
element_wise_op(derivative_array,
activator.backward)
self.delta_array *= derivative_array # 加上激活函数的梯度
# 将步长为S的sensitivity map『还原』为步长为1的sensitivity map
def expand_sensitivity_map(self, sensitivity_array):
depth = sensitivity_array.shape[0]
# 确定扩展后sensitivity map的大小
# 计算stride为1时sensitivity map的大小
expanded_width = (self.input_width -
self.filter_width + 2 * self.zero_padding + 1)
expanded_height = (self.input_height -
self.filter_height + 2 * self.zero_padding + 1)
# 构建新的sensitivity_map
expand_array = np.zeros((depth, expanded_height,
expanded_width))
# 从原始sensitivity map拷贝误差值
for i in range(self.output_height):
for j in range(self.output_width):
i_pos = i * self.stride
j_pos = j * self.stride
expand_array[:, i_pos, j_pos] = \
sensitivity_array[:, i, j]
return expand_array
# 创建用来保存传递到上一层的sensitivity map的数组
def create_delta_array(self):
return np.zeros((self.channel_number,
self.input_height, self.input_width))
# 权重梯度:上一层激活函数输出 与 当前层激活函数加权输入的梯度 卷积
def bp_gradient(self, sensitivity_array):
# 处理卷积步长,对原始sensitivity map进行扩展
expanded_array = self.expand_sensitivity_map(
sensitivity_array)
for f in range(self.filter_number):
# 计算每个权重的梯度
filter = self.filters[f]
for d in range(filter.weights.shape[0]):
conv(self.padded_input_array[d],
expanded_array[f],
filter.weights_grad[d], 1, 0)
# 计算偏置项的梯度
filter.bias_grad = expanded_array[f].sum()
def update(self):
'''
按照梯度下降,更新权重
'''
for filter in self.filters:
filter.update(self.learning_rate)
class MaxPoolingLayer(object):
def __init__(self, input_width, input_height,
channel_number, filter_width,
filter_height, stride):
self.input_width = input_width
self.input_height = input_height
self.channel_number = channel_number
self.filter_width = filter_width
self.filter_height = filter_height
self.stride = stride
self.output_width = (input_width -
filter_width) / self.stride + 1
self.output_height = (input_height -
filter_height) / self.stride + 1
self.output_array = np.zeros((self.channel_number,
self.output_height, self.output_width))
def forward(self, input_array):
for d in range(self.channel_number):
for i in range(self.output_height):
for j in range(self.output_width):
self.output_array[d,i,j] = (
get_patch(input_array[d], i, j,
self.filter_width,
self.filter_height,
self.stride).max())
def backward(self, input_array, sensitivity_array):
self.delta_array = np.zeros(input_array.shape)
for d in range(self.channel_number):
for i in range(self.output_height):
for j in range(self.output_width):
patch_array = get_patch(
input_array[d], i, j,
self.filter_width,
self.filter_height,
self.stride)
k, l = get_max_index(patch_array)
self.delta_array[d,
i * self.stride + k,
j * self.stride + l] = \
sensitivity_array[d,i,j]