写了一周多,终于把cnn的前向传播和BP算法写完了,加入了正则化,滑动平均等。。(还是个初学者,很多能有写错的地方)
收货很多,对于tensorflow框架的认识也更深了一些
相比于tensorflow,速度还是慢了很多。。。
import numpy as np
import threading
import time
import cv2
import random
start=time.clock()
batch_size = 20
learning_rate_base = 0.0002
regularization_rate = 0.0001
moving_average_decay = 0.999
train_steps = 2000
save_result_step = 1
label = []
#print('img shape',img.shape)
layer_name_list=['conv1',
'pool1',
'conv2',
'pool2',
'fc1',
'fc2']
pooling_layer_name_list=['pool1',
'pool2']
conv_layer_name_list=['conv1',
'conv2']
fc_layer_name_list=['fc1',
'fc2']
layer_params_size_dict = {'conv1':(3,3,3,3),
'conv2':(3,3,3,4),
'pool1':(2,2,3),
'pool2':(2,2,4),
'fc1':25,
'fc2':1} #输出节点个数
gradient_name_list=['conv1',
'conv2',
'fc1',
'fc2']
active_layer_name_list=['conv1',
'conv2',
'fc1',
'fc2']
actived_output_dict=dict.fromkeys(active_layer_name_list)
active_function_gradient_dict=dict.fromkeys(active_layer_name_list)
conv_weights_dict=dict.fromkeys(conv_layer_name_list)
conv_biases_dict=dict.fromkeys(conv_layer_name_list)
pooling_layer_input_size_dict=dict.fromkeys(pooling_layer_name_list)
pooling_layer_ouput_size_dict=dict.fromkeys(pooling_layer_name_list)
pooling_layer_input_dict = dict.fromkeys(pooling_layer_name_list)
pooling_layer_output_dict = dict.fromkeys(pooling_layer_name_list)
conv_layer_input_size_dict=dict.fromkeys(conv_layer_name_list)
conv_layer_output_size_dict=dict.fromkeys(conv_layer_name_list)
fc_layer_input_size_dict=dict.fromkeys(fc_layer_name_list)
fc_layer_input_size_dict['fc1'] = 676
fc_layer_input_size_dict['fc2'] = 25
fc_weights_dict=dict.fromkeys(fc_layer_name_list)
fc_biases_dict=dict.fromkeys(fc_layer_name_list)
pooling_layer_max_index_dict=dict.fromkeys(pooling_layer_name_list)
weishts_gradient_dict=dict.fromkeys(gradient_name_list)
biases_gradient_dict=dict.fromkeys(gradient_name_list)
weights_gradient_shadow_dict=dict.fromkeys(gradient_name_list)
biases_gradient_shadow_dict=dict.fromkeys(gradient_name_list)
def generate_conv_weights(_layer_name_, _params_mean_, _params_std_):
conv_weights_dict[_layer_name_] = np.random.normal(_params_mean_, _params_std_, layer_params_size_dict[_layer_name_])
weishts_gradient_dict[_layer_name_] = np.zeros(layer_params_size_dict[_layer_name_])
weights_gradient_shadow_dict[_layer_name_] = np.zeros(layer_params_size_dict[_layer_name_])
#卷积层偏置的个数等于卷基输出的通道数
def generate_conv_biases(_layer_name_, _params_mean_):
conv_biases_dict[_layer_name_] = np.ones((layer_params_size_dict[_layer_name_][3])) * _params_mean_
biases_gradient_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_][3]))
biases_gradient_shadow_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_][3]))
def generate_fc_weights(_layer_name_, _params_mean_, _params_std_):
fc_weights_dict[_layer_name_] = np.random.normal(_params_mean_, _params_std_ , (fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
weishts_gradient_dict[_layer_name_] = np.zeros((fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
weights_gradient_shadow_dict[_layer_name_] = np.zeros((fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
def generate_fc_biases(_layer_name_, _params_mean_):
fc_biases_dict[_layer_name_] = np.ones((layer_params_size_dict[_layer_name_])) * _params_mean_
biases_gradient_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_]))
biases_gradient_shadow_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_]))
#进行padding conv
# 步长默认为1
def conv(_input_, _layer_name_, _stride_x = 1, _stride_y = 1):
_input_size_ = _input_.shape
#卷积核的大小
_filter_size_ = layer_params_size_dict[_layer_name_]
_filter_size_x_ = _filter_size_[1]
_filter_size_y_ = _filter_size_[0]
# _in_channel_ = _filter_size_[2]
_out_channel_ = _filter_size_[3]
#在x y两个方向上单边填充的大小,仅适用于stride=1时
_x_padding_ = int((_filter_size_x_ - 1)/2)
_y_padding_ = int((_filter_size_y_ - 1)/2)
_padded_input_ = np.pad(_input_, ((_y_padding_, _y_padding_), (_x_padding_, _x_padding_), (0,0)),'constant')
#在x y两个方向上滑动的次数,即是卷积后输出图像的大小
_x_slip_num_ = int((_input_size_[1]-1)/_stride_x+1)
_y_slip_num_ = int((_input_size_[0]-1)/_stride_y+1)
_conv_weights_matrix_ = conv_weights_dict[_layer_name_]
# _output_ = layer_output_empty_matrix_dict[_layer_name_]
_output_ = np.empty((_y_slip_num_, _x_slip_num_, _out_channel_))
for _z_ in range(0,_out_channel_):
x_ = 0
for _i_ in range(0,_x_slip_num_):
y_ = 0
for _j_ in range(0,_y_slip_num_):
_output_[_j_, _i_, _z_]=np.sum( np.multiply(_conv_weights_matrix_[:,:,:,_z_], _padded_input_[y_:y_+_filter_size_y_, x_:x_+_filter_size_x_,:]))
y_ = y_ + _stride_y
x_ = x_ + _stride_x
conv_layer_input_size_dict[_layer_name_] = _input_size_
conv_layer_output_size_dict[_layer_name_] = [_y_slip_num_, _x_slip_num_, _out_channel_]
return _output_
#generate_conv_weights('conv1', 0.0, 0.1)
#print(conv(img,'conv1').shape)
#pooling窗口滑动步长默认为2
#要保证步长小于等于核的大小,否则 x_padding,y_padding可能为负值,报错
def max_pooling(_input_, _layer_name_, _stride_x_ = 2, _stride_y_ = 2,):
_input_size_ = _input_.shape
_filter_size_ = layer_params_size_dict[_layer_name_]
_kernel_size_x_ = _filter_size_[1]
_kernel_size_y_ = _filter_size_[0]
_channel_ = _filter_size_[2]
#pooling是否需要进行padding的标志
_x_padding_mark_ = (_input_size_[1] - _kernel_size_x_) % _stride_x_
_y_padding_mark_ = (_input_size_[0] - _kernel_size_y_) % _stride_y_
if _x_padding_mark_ ==0:
_x_padding_ = 0
else:
_x_padding_ = _kernel_size_x_ - _x_padding_mark_
if _y_padding_mark_ == 0:
_y_padding_ = 0
else:
_y_padding_ = _kernel_size_y_ - _y_padding_mark_
_padded_input_ = np.pad(_input_,((0,_y_padding_), (0,_x_padding_), (0,0)),'constant')
#填充后的大小
_padded_input_size_ = _padded_input_.shape
# 两个方向上滑动的次数,即是pool后的size
_x_slip_num_ = int((_padded_input_size_[1]-_kernel_size_x_)/_stride_x_+1)
_y_slip_num_ = int((_padded_input_size_[0]-_kernel_size_y_)/_stride_y_+1)
_output_ = np.empty((_y_slip_num_, _x_slip_num_, _channel_))
_max_index_array_ = np.empty((_y_slip_num_, _x_slip_num_, _channel_))
for _z_ in range(_channel_):
x_ = 0
for _i_ in range(0,_x_slip_num_): # x 方向循环
y_ = 0
for _j_ in range(0,_y_slip_num_): # y
# 修改pooling方式,修改np.max
_in_=_padded_input_[y_ : y_ + _kernel_size_y_ , x_ : x_ + _kernel_size_x_ , _z_]
_output_[_j_, _i_, _z_]=np.max(_in_)
_max_index_array_[_j_, _i_, _z_]=np.argmax(_in_)
y_ = y_ + _stride_y_
x_ = x_ + _stride_x_
pooling_layer_max_index_dict[_layer_name_] = _max_index_array_
pooling_layer_input_size_dict[_layer_name_] = _input_size_
pooling_layer_ouput_size_dict[_layer_name_] = _output_.shape
pooling_layer_input_dict[_layer_name_] = _input_
pooling_layer_output_dict[_layer_name_] = _output_
return _output_
def relu_function(x__ , _threshold_=0):
if x__ < _threshold_:
return 0.0
else:
return x__
#relu 激活
def apply_relu_active(_input_matrix_, _layer_name_):
vfunc=np.vectorize(relu_function) #np.vextorize() 强函数向量化,作用在数组中的元素上,本质上仍然是for循环,并没有提升效率
_out_ = vfunc(_input_matrix_)
actived_output_dict[_layer_name_] = _out_
return _out_
#生成参数并保存至字典中
def generate_params():
for _name_ in conv_layer_name_list:
generate_conv_weights(_name_, 0.0, 0.1)
generate_conv_biases(_name_, 0.1)
# print('$$$$$$$$$$$')
for _name2_ in fc_layer_name_list:
generate_fc_weights(_name2_ , 0.0, 0.1)
generate_fc_biases(_name2_,0.1)
# print('#################')
def conv_inference_and_bp(_input_):
conv1 = conv(_input_,'conv1')
# print('conv 1 shape',conv1.shape)
actived_conv1 = apply_relu_active(conv1 + conv_biases_dict['conv1'], 'conv1')
pool1 = max_pooling(actived_conv1,'pool1')
# print('pool 1 shape', pool1.shape)
conv2 = conv(pool1,'conv2')
# print('conv 2 shape',conv2.shape)
actived_conv2 = apply_relu_active(conv2 + conv_biases_dict['conv2'], 'conv1')
pool2 = max_pooling(actived_conv2,'pool2')
# print('pool 2 shape',pool2.shape)
pool2_size = pool2.shape
_nodes_ = pool2_size[0] * pool2_size[1] * pool2_size[2]
_reshaped_nodes_ = np.reshape(pool2,[1,_nodes_])
fc1 = np.dot(_reshaped_nodes_, fc_weights_dict['fc1']) + fc_biases_dict['fc1']
actived_fc1 = apply_relu_active(fc1, 'fc1')
# print('fc1 shape',fc1.shape)
fc2 = np.dot(actived_fc1, fc_weights_dict['fc2']) +fc_biases_dict['fc2']
return fc2
#def fc_inference_and_bp(_input_):
#
# _input_size_ = _input_.shape
# _nodes_ = _input_size_[0] * _input_size_[1] *_input_size_[2]
## print(_nodes_)
## fc_layer_input_size_dict['fc1'] = _nodes_
# _reshaped_nodes_ = np.reshape(_input_,[1,_nodes_])
# fc1 = np.dot(_reshaped_nodes_, fc_weights_dict['fc1']) + fc_biases_dict['fc1']
# actived_fc1 = apply_relu_active(fc1)
# print('fc1 shape',fc1.shape)
# fc2 = np.dot(actived_fc1, fc_weights_dict['fc2']) +fc_biases_dict['fc2']
# print('fc2 shape',fc2.shape)
# return fc2
#generate_params()
#_a = conv_inference_and_bp(img)
#fc_inference_and_bp(_a)
# 定义relu 梯度函数
def calculate_gradient_function(_x_,):
if _x_ > 0:
return 1.0
else:
return 0.0
#计算relu激活函数反向传播梯度
def calculate_active_layer_gradient(_input_matrix_, _layer_name_):
vfunc = np.vectorize(calculate_gradient_function)
_out_ = vfunc(_input_matrix_)
active_function_gradient_dict[_layer_name_] = _out_
return _out_
#采用平方损失的情况下输出层损失
def fc_outlayer_error(_out_,_label_):
return np.asarray( _out_ - _label_ )
#layer name为fc层
def fc_hidden_layer_error(_layer_name_, _previous_layer_error_):
return np.multiply( np.dot ( _previous_layer_error_ , fc_weights_dict[_layer_name_].T ) , calculate_active_layer_gradient(actived_output_dict['fc1'], _layer_name_) )
#layer name 为fc 层
def pool_layer_error_from_fc(_error_from_fc, _layer_name_,):
_fc_weights_ = fc_weights_dict[_layer_name_]
# 与fc连接的池化层的大小
_shape_ = pooling_layer_ouput_size_dict[pooling_layer_name_list[len(pooling_layer_name_list) - 1]]
return np.reshape(np.dot( _error_from_fc, _fc_weights_.T), [ _shape_[0], _shape_[1], _shape_[2] ])
#layer_name为 pool层名
def pool_layer_error_upsample(_error_matrix_, _layer_name_, _stride_x_ = 2, _stride_y_ = 2):
_padded_upsampled_shape_ = pooling_layer_input_size_dict[_layer_name_]
_upsampled_error_ = np.zeros((_padded_upsampled_shape_[0], _padded_upsampled_shape_[1], _padded_upsampled_shape_[2]))
_index_array_ = pooling_layer_max_index_dict[_layer_name_]
_kernel_size_ = layer_params_size_dict[_layer_name_]
_kernel_size_x_ = _kernel_size_[1]
_kernel_size_y_ = _kernel_size_[0]
_xs_=int((_padded_upsampled_shape_[1] - _kernel_size_x_)/_stride_x_+1)
_ys_=int((_padded_upsampled_shape_[0] - _kernel_size_y_)/_stride_y_+1)
channel=_padded_upsampled_shape_[2]
for z in range(channel):
_x_ = 0
for i in range(_xs_): # x 方向循环
_y_ = 0
for j in range(_ys_): # y
_index_ = _index_array_[j,i,z]
_x_index_ = int(( _index_ ) / _kernel_size_x_ )
_y_index_ = int((_index_ ) % _kernel_size_y_)
_upsampled_error_[_y_ : _y_ + _kernel_size_y_ , _x_ : _x_ + _kernel_size_x_ , z][_y_index_,_x_index_] = _error_matrix_[j,i,z]
_y_ = _y_ + _stride_y_
_x_ = _x_ + _stride_x_
# print('upsample pool layer err',_upsampled_error_.shape)
return _upsampled_error_
# _layer_name_ 为pooling层的名
def conv_layer_error_from_pool(_err_maxtrix_, _layer_name_):
# print(_err_maxtrix_.shape)
return np.multiply(pool_layer_error_upsample(_err_maxtrix_,_layer_name_), calculate_active_layer_gradient(pooling_layer_input_dict[_layer_name_], _layer_name_))
# return pool_layer_error_upsample(_err_maxtrix,_layer_)
#注意没有channel维度
def bp_conv_padding(_img_, _filter_size_x_, _filter_size_y_):
_x_padding_=int((_filter_size_x_-1)/2)
_y_padding_=int((_filter_size_y_-1)/2)
return np.pad(_img_,((_y_padding_,_y_padding_), (_x_padding_,_x_padding_)),'constant')
# 由conv层error求pool层error
def bp_conv_one_channel(_err_, _weights_, _shape_, _filter_size_x_, _filter_size_y_, _stride_x_=1, _stride_y_=1):
_x_num_ = int((_shape_[1]-1)/_stride_x_+1) #在两个方向上滑动的次数,即输出的大小
_y_num_ = int((_shape_[0]-1)/_stride_y_+1)
#卷积核的大小
# _filter_size_ = layer_params_size_dict[_layer_name_]
_one_channel_out_err_ = np.empty((_y_num_,_x_num_))
_err_ = bp_conv_padding(_err_,_filter_size_x_,_filter_size_y_)
_x_ = 0
for _i_ in range(0,_x_num_):
_y_ = 0
for _j_ in range(0,_y_num_):
_one_channel_out_err_[_j_, _i_] =np.sum( np.multiply(_err_[_y_:_y_+_filter_size_y_, _x_:_x_+_filter_size_x_], _weights_))
_y_ = _y_ + _stride_y_
_x_ = _x_ + _stride_x_
return _one_channel_out_err_
#计算卷基层参数的更新梯度,_layer_name_ 为conv层名
def calculate_conv_weights_update_gradient(_err_, _previous_out_, _layer_name_):
_size_ = layer_params_size_dict[_layer_name_]
_xs_ = _size_[1] -1 #由kernel size 求循环次数
_ys_ = _size_[0] -1
_shape_list_ = conv_layer_output_size_dict[_layer_name_]
_shape_x_ = _shape_list_[1]
_shape_y_ = _shape_list_[0]
_gradient_matrix_ = np.zeros((_size_[0],
_size_[1],
_size_[2],
_size_[3]))
for _out_channel_ in range(_size_[3]):
_padded_err_ = bp_conv_padding( _err_[:,:,_out_channel_], _size_[1], _size_[0] )
for _in_channel_ in range(_size_[2]):
_previous_out_one_layer_ = _previous_out_[:,:,_in_channel_]
for _i_ in range(_xs_):
for _j_ in range(_ys_):
_gradient_matrix_[_j_, _i_, _in_channel_, _out_channel_] = np.sum( np.multiply(_padded_err_[_j_:_j_+_shape_y_, _i_:_i_+_shape_x_] ,_previous_out_one_layer_))
return _gradient_matrix_
#_layer_name_ 为conv层名
def pool_layer_error_from_conv(_error_, _layer_name_):
_size_ = layer_params_size_dict[_layer_name_]
_out_shape_ = _size_[3] #conv层输出的channel数
_in_shape_ = _size_[2] #conv层输入的channel数
_out_size_ = conv_layer_input_size_dict[_layer_name_]
_out_error_ = np.zeros((_out_size_[0],_out_size_[1] ,_out_size_[2] ))
# print('size',_size_[2])
_weights_ = conv_weights_dict[_layer_name_]
for _out_channel_ in range(_out_shape_):
for _in_channel_ in range(_in_shape_):
_out_error_[:,:,_in_channel_] = _out_error_[:,:,_in_channel_] + bp_conv_one_channel(_error_[:,:,_out_channel_],
_weights_[:,:,_in_channel_,_out_channel_],
# _weights_list_[_out_channel_][:,:,_in_channel_],
_out_size_,
_size_[1],
_size_[0]
)
# print(_out_error_.shape)
return _out_error_ / _out_shape_
#l2正则化
def l2_regularization(_input_):
return ( np.sum( _input_ ** 2 ) ) ** 0.5
#计算各层error
def error_bp_op(_img_):
_y_ = conv_inference_and_bp(_img_)
# print('qian xiang chuan bo ',_y_)
_loss_ = fc_outlayer_error(_y_,label)
# 加入正则化
_loss_ += regularization_rate * l2_regularization(_loss_)
# print('fc output layer loss ', _loss_.shape)
_fc2_weights_gradient_ = np.dot( actived_output_dict['fc1'].T, _loss_)
_hidden_layer_err_ = fc_hidden_layer_error('fc2', _loss_)
# 加入正则化
_hidden_layer_err_ += regularization_rate * l2_regularization(_hidden_layer_err_)
# print('hidden layer error', _hidden_layer_err_.shape)
_fc1_weights_gradient_ = np.dot( np.reshape(pooling_layer_output_dict['pool2'],( [1,676])).T, _hidden_layer_err_)
# print('fc 1 weights gradient',_fc1_weights_gradient_.shape)
_pool_2_err_ = pool_layer_error_from_fc(_hidden_layer_err_, 'fc1')
# print(' pool 2 layer error',_pool_2_err_.shape)
_conv2_err_ = conv_layer_error_from_pool(_pool_2_err_, 'pool2')
# print('conv 2 layer error', _conv2_err_.shape)
_conv2_weights_gradient_ = calculate_conv_weights_update_gradient(_conv2_err_, pooling_layer_output_dict['pool1'], 'conv2')
# print('conv 2 weights gradient ',_conv2_weights_gradient_.shape)
_pool_1_err_ = pool_layer_error_from_conv(_conv2_err_, 'conv2')
# print('pool 1 layer error ',_pool_1_err_.shape)
_conv1_err_ = conv_layer_error_from_pool(_pool_1_err_, 'pool1')
# print('conv 1 layer error ',_conv1_err_.shape)
_conv1_weights_gradient_ = calculate_conv_weights_update_gradient(_conv1_err_, _img_, 'conv1')
# print('conv 1 weights gradient', _conv1_weights_gradient_.shape)
weishts_gradient_dict['conv1'] += _conv1_weights_gradient_
weishts_gradient_dict['conv2'] += _conv2_weights_gradient_
weishts_gradient_dict['fc1'] += _fc1_weights_gradient_
weishts_gradient_dict['fc2'] += _fc2_weights_gradient_
biases_gradient_dict['conv1'] += np.sum( _conv1_err_, axis=(0,1) )
biases_gradient_dict['conv2'] += np.sum( _conv2_err_, axis=(0,1) )
biases_gradient_dict['fc1'] += _hidden_layer_err_
biases_gradient_dict['fc2'] += _loss_
print('inference ',_y_)
# return _conv1_weights_gradient_, _conv2_weights_gradient_, _fc1_weights_gradient_, _fc2_weights_gradient_
return _loss_
positive_num = 0
negative_num = 0
def train_input():
label.clear()
if random.randint(1,10)>5:
_img_ = cv2.imread(r'E:\faces_gray_resized5050\{}.jpg'.format(random.randint(1,150000)))
# positive_num += 1
label.append(1)
else:
_img_ = cv2.imread(r'E:\Img\opencv3negative_faces_data\face\negdata_gray\{}.jpg'.format(random.randint(1,80000)))
# negative_num += 1
label.append(-1)
print('label',label)
# 归一化
return _img_ / 255,label
def update_weights_and_params():
for _step_ in range(train_steps):
for _i_ in range(batch_size):
_input_ = train_input()
print('loss',error_bp_op(_input_[0]))
# 求一个batch的平均梯度
for _name_ in gradient_name_list:
weishts_gradient_dict[_name_] / batch_size
biases_gradient_dict[_name_] / batch_size
#加入滑动平均值
if _step_ %(1/(1-moving_average_decay)) == 0.0:
for _name4_ in gradient_name_list:
weights_gradient_shadow_dict[_name4_] = weishts_gradient_dict[_name4_]
biases_gradient_shadow_dict[_name4_] = biases_gradient_dict[_name4_]
else:
for _name5_ in gradient_name_list:
weights_gradient_shadow_dict[_name5_] = weishts_gradient_dict[_name5_] * (1- moving_average_decay) + weights_gradient_shadow_dict[_name5_] * moving_average_decay
biases_gradient_shadow_dict[_name5_] = biases_gradient_dict[_name5_] * (1- moving_average_decay) + biases_gradient_shadow_dict[_name5_] * moving_average_decay
# 更新卷基层参数
for _name2_ in conv_layer_name_list:
conv_weights_dict[_name2_] = conv_weights_dict[_name2_] - learning_rate_base * weights_gradient_shadow_dict[_name2_]
conv_biases_dict[_name2_] = conv_biases_dict[_name2_] - learning_rate_base * biases_gradient_shadow_dict[_name2_]
# 更新完参数后,存储梯度的数组清零
weishts_gradient_dict[_name2_] = weishts_gradient_dict[_name2_] * 0
biases_gradient_dict[_name2_] = biases_gradient_dict[_name2_] * 0
# 更新全连接层参数
for _name3_ in fc_layer_name_list:
fc_weights_dict[_name3_] = fc_weights_dict[_name3_] - learning_rate_base * weishts_gradient_dict[_name3_]
fc_biases_dict[_name3_] = fc_biases_dict[_name3_] - learning_rate_base * biases_gradient_dict[_name3_]
weishts_gradient_dict[_name3_] = weishts_gradient_dict[_name3_] * 0
biases_gradient_dict[_name3_] = biases_gradient_dict[_name3_] * 0
# if _step_ % save_result_step == 0:
## 保存数据
# with open(r'C:\Users\Administrator\Desktop\tensorflow\ccccccc7.txt','a') as f:
# f.writelines([str(_step_),'#',str(loss_value),'#',str(recall_rate),'#',str(precision_rate),'\n'])
generate_params()
update_weights_and_params()
end=time.clock()
print("running time is %g s" % (end-start))