CNN反向传播实现

写了一周多,终于把cnn的前向传播和BP算法写完了,加入了正则化,滑动平均等。。(还是个初学者,很多能有写错的地方)
收货很多,对于tensorflow框架的认识也更深了一些
相比于tensorflow,速度还是慢了很多。。。

import numpy as np
import threading
import time
import cv2
import random

start=time.clock()

batch_size = 20

learning_rate_base = 0.0002

regularization_rate = 0.0001

moving_average_decay = 0.999

train_steps = 2000
save_result_step = 1
label = []

#print('img shape',img.shape)
layer_name_list=['conv1',
                 'pool1',
                 'conv2',
                 'pool2',
                 'fc1',
                 'fc2']


pooling_layer_name_list=['pool1',
                         'pool2']

conv_layer_name_list=['conv1',
                      'conv2']

fc_layer_name_list=['fc1',
                    'fc2']

layer_params_size_dict = {'conv1':(3,3,3,3),
                          'conv2':(3,3,3,4),
                          'pool1':(2,2,3),
                          'pool2':(2,2,4),
                          'fc1':25,
                          'fc2':1}         #输出节点个数
gradient_name_list=['conv1',
                    'conv2',
                    'fc1',
                    'fc2']

active_layer_name_list=['conv1',
                        'conv2',
                        'fc1',
                        'fc2']

actived_output_dict=dict.fromkeys(active_layer_name_list)

active_function_gradient_dict=dict.fromkeys(active_layer_name_list)

conv_weights_dict=dict.fromkeys(conv_layer_name_list)
conv_biases_dict=dict.fromkeys(conv_layer_name_list)

pooling_layer_input_size_dict=dict.fromkeys(pooling_layer_name_list)
pooling_layer_ouput_size_dict=dict.fromkeys(pooling_layer_name_list)
pooling_layer_input_dict = dict.fromkeys(pooling_layer_name_list)
pooling_layer_output_dict = dict.fromkeys(pooling_layer_name_list)

conv_layer_input_size_dict=dict.fromkeys(conv_layer_name_list)
conv_layer_output_size_dict=dict.fromkeys(conv_layer_name_list)

fc_layer_input_size_dict=dict.fromkeys(fc_layer_name_list)

fc_layer_input_size_dict['fc1'] = 676
fc_layer_input_size_dict['fc2'] = 25

fc_weights_dict=dict.fromkeys(fc_layer_name_list)

fc_biases_dict=dict.fromkeys(fc_layer_name_list)



pooling_layer_max_index_dict=dict.fromkeys(pooling_layer_name_list)

weishts_gradient_dict=dict.fromkeys(gradient_name_list)
biases_gradient_dict=dict.fromkeys(gradient_name_list)  

weights_gradient_shadow_dict=dict.fromkeys(gradient_name_list)
biases_gradient_shadow_dict=dict.fromkeys(gradient_name_list)

def generate_conv_weights(_layer_name_, _params_mean_, _params_std_):
    conv_weights_dict[_layer_name_] = np.random.normal(_params_mean_, _params_std_, layer_params_size_dict[_layer_name_])
    weishts_gradient_dict[_layer_name_] = np.zeros(layer_params_size_dict[_layer_name_])
    weights_gradient_shadow_dict[_layer_name_] = np.zeros(layer_params_size_dict[_layer_name_])
    
#卷积层偏置的个数等于卷基输出的通道数
def generate_conv_biases(_layer_name_, _params_mean_):
    conv_biases_dict[_layer_name_] = np.ones((layer_params_size_dict[_layer_name_][3])) * _params_mean_
    biases_gradient_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_][3]))
    biases_gradient_shadow_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_][3]))
    
def generate_fc_weights(_layer_name_,  _params_mean_, _params_std_):
    fc_weights_dict[_layer_name_] = np.random.normal(_params_mean_, _params_std_ , (fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
    weishts_gradient_dict[_layer_name_] = np.zeros((fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
    weights_gradient_shadow_dict[_layer_name_] = np.zeros((fc_layer_input_size_dict[_layer_name_], layer_params_size_dict[_layer_name_]))
    
def generate_fc_biases(_layer_name_, _params_mean_):
    fc_biases_dict[_layer_name_] = np.ones((layer_params_size_dict[_layer_name_])) * _params_mean_
    biases_gradient_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_]))
    biases_gradient_shadow_dict[_layer_name_] = np.zeros((1, layer_params_size_dict[_layer_name_]))
    

    


#进行padding  conv
#    步长默认为1
def conv(_input_, _layer_name_, _stride_x = 1, _stride_y = 1):
    
    _input_size_ = _input_.shape
#卷积核的大小    
    _filter_size_ = layer_params_size_dict[_layer_name_]
    
    _filter_size_x_ = _filter_size_[1]
    _filter_size_y_ = _filter_size_[0]
    
#    _in_channel_ = _filter_size_[2]
    _out_channel_ = _filter_size_[3]
#在x y两个方向上单边填充的大小,仅适用于stride=1时
    _x_padding_ = int((_filter_size_x_ - 1)/2)
    _y_padding_ = int((_filter_size_y_ - 1)/2)
    
    _padded_input_ = np.pad(_input_, ((_y_padding_, _y_padding_), (_x_padding_, _x_padding_), (0,0)),'constant')
#在x y两个方向上滑动的次数,即是卷积后输出图像的大小    
    _x_slip_num_ = int((_input_size_[1]-1)/_stride_x+1)
    _y_slip_num_ = int((_input_size_[0]-1)/_stride_y+1)
    
    _conv_weights_matrix_ = conv_weights_dict[_layer_name_]
    
#    _output_ = layer_output_empty_matrix_dict[_layer_name_]
    _output_ = np.empty((_y_slip_num_, _x_slip_num_, _out_channel_))

    for _z_ in range(0,_out_channel_):
        
        x_ = 0
        for _i_ in range(0,_x_slip_num_):
            
            y_ = 0
            for _j_ in range(0,_y_slip_num_):
                _output_[_j_, _i_, _z_]=np.sum( np.multiply(_conv_weights_matrix_[:,:,:,_z_], _padded_input_[y_:y_+_filter_size_y_, x_:x_+_filter_size_x_,:]))    
                
                y_ = y_ + _stride_y
            x_ = x_ + _stride_x
    
    conv_layer_input_size_dict[_layer_name_] = _input_size_
    conv_layer_output_size_dict[_layer_name_] = [_y_slip_num_, _x_slip_num_, _out_channel_]
    return _output_
 
#generate_conv_weights('conv1', 0.0, 0.1)    
#print(conv(img,'conv1').shape)

#pooling窗口滑动步长默认为2
#要保证步长小于等于核的大小,否则 x_padding,y_padding可能为负值,报错 
def max_pooling(_input_, _layer_name_, _stride_x_ = 2, _stride_y_ = 2,):
    
    _input_size_ = _input_.shape
    
    _filter_size_ = layer_params_size_dict[_layer_name_]
    _kernel_size_x_ = _filter_size_[1]
    _kernel_size_y_ = _filter_size_[0]
    _channel_ = _filter_size_[2]
#pooling是否需要进行padding的标志    
    _x_padding_mark_ = (_input_size_[1] - _kernel_size_x_) % _stride_x_
    _y_padding_mark_ = (_input_size_[0] - _kernel_size_y_) % _stride_y_
   
    if _x_padding_mark_ ==0:
        _x_padding_ = 0
    else:
        _x_padding_ = _kernel_size_x_ - _x_padding_mark_
    
    if _y_padding_mark_ == 0:
        _y_padding_ = 0
    else:
        _y_padding_ = _kernel_size_y_ - _y_padding_mark_
    
    _padded_input_ = np.pad(_input_,((0,_y_padding_), (0,_x_padding_), (0,0)),'constant')
#填充后的大小    
    _padded_input_size_ = _padded_input_.shape
# 两个方向上滑动的次数,即是pool后的size   
    _x_slip_num_ = int((_padded_input_size_[1]-_kernel_size_x_)/_stride_x_+1)
    _y_slip_num_ = int((_padded_input_size_[0]-_kernel_size_y_)/_stride_y_+1)
    
    _output_ = np.empty((_y_slip_num_, _x_slip_num_, _channel_))
    
    _max_index_array_ = np.empty((_y_slip_num_, _x_slip_num_, _channel_))
    
    for _z_ in range(_channel_):
        x_ = 0
        for _i_ in range(0,_x_slip_num_):       # x 方向循环
            
            y_ = 0
            for _j_ in range(0,_y_slip_num_):                             # y 
#                修改pooling方式,修改np.max
                _in_=_padded_input_[y_ : y_ + _kernel_size_y_ , x_ : x_ + _kernel_size_x_ , _z_]
                _output_[_j_, _i_, _z_]=np.max(_in_)
                
                _max_index_array_[_j_, _i_, _z_]=np.argmax(_in_)

                y_ = y_ + _stride_y_
            x_ = x_ + _stride_x_
            
    pooling_layer_max_index_dict[_layer_name_] = _max_index_array_
    pooling_layer_input_size_dict[_layer_name_] = _input_size_
    pooling_layer_ouput_size_dict[_layer_name_] = _output_.shape
    pooling_layer_input_dict[_layer_name_] = _input_
    pooling_layer_output_dict[_layer_name_] = _output_
    return _output_

  

def relu_function(x__ , _threshold_=0):
    if x__ < _threshold_:
        return 0.0
    else:
        return x__
    
#relu 激活
def apply_relu_active(_input_matrix_, _layer_name_):
    vfunc=np.vectorize(relu_function)       #np.vextorize() 强函数向量化,作用在数组中的元素上,本质上仍然是for循环,并没有提升效率
    _out_ = vfunc(_input_matrix_)

    actived_output_dict[_layer_name_] = _out_      
    return _out_

#生成参数并保存至字典中
def generate_params():
    
    for _name_ in conv_layer_name_list:
        generate_conv_weights(_name_, 0.0, 0.1)
        generate_conv_biases(_name_, 0.1)
#    print('$$$$$$$$$$$')
    for _name2_ in fc_layer_name_list:
        generate_fc_weights(_name2_ , 0.0, 0.1)
        generate_fc_biases(_name2_,0.1)
#        print('#################')
    
    
def conv_inference_and_bp(_input_):
    conv1 = conv(_input_,'conv1')   

#    print('conv 1 shape',conv1.shape)
    actived_conv1 = apply_relu_active(conv1 + conv_biases_dict['conv1'], 'conv1')
    pool1 = max_pooling(actived_conv1,'pool1')
#    print('pool 1 shape', pool1.shape)

    conv2 = conv(pool1,'conv2')
#    print('conv 2 shape',conv2.shape)
    actived_conv2 = apply_relu_active(conv2 + conv_biases_dict['conv2'], 'conv1')
    pool2 = max_pooling(actived_conv2,'pool2')
#    print('pool 2 shape',pool2.shape)
    
    pool2_size = pool2.shape
    
    _nodes_ = pool2_size[0] * pool2_size[1] * pool2_size[2] 
    _reshaped_nodes_ = np.reshape(pool2,[1,_nodes_])
    
    fc1 = np.dot(_reshaped_nodes_, fc_weights_dict['fc1']) + fc_biases_dict['fc1']
    actived_fc1 = apply_relu_active(fc1, 'fc1')
    
#    print('fc1 shape',fc1.shape)
    fc2 = np.dot(actived_fc1, fc_weights_dict['fc2']) +fc_biases_dict['fc2']
    
    return fc2


#def fc_inference_and_bp(_input_):
#    
#    _input_size_ = _input_.shape
#    _nodes_ = _input_size_[0] * _input_size_[1] *_input_size_[2] 
##    print(_nodes_)
##    fc_layer_input_size_dict['fc1'] = _nodes_
#    _reshaped_nodes_ = np.reshape(_input_,[1,_nodes_])
#    fc1 = np.dot(_reshaped_nodes_, fc_weights_dict['fc1']) + fc_biases_dict['fc1']
#    actived_fc1 = apply_relu_active(fc1)
#    print('fc1 shape',fc1.shape)
#    fc2 = np.dot(actived_fc1, fc_weights_dict['fc2']) +fc_biases_dict['fc2']
#    print('fc2 shape',fc2.shape)
#    return fc2

#generate_params()
#_a = conv_inference_and_bp(img)
#fc_inference_and_bp(_a)


#  定义relu 梯度函数
def calculate_gradient_function(_x_,):
    if _x_ > 0:
        return 1.0
    else:
        return 0.0
#计算relu激活函数反向传播梯度 
def calculate_active_layer_gradient(_input_matrix_, _layer_name_):
    vfunc = np.vectorize(calculate_gradient_function)
    _out_ = vfunc(_input_matrix_)
    
    active_function_gradient_dict[_layer_name_] = _out_
    return _out_

#采用平方损失的情况下输出层损失
def fc_outlayer_error(_out_,_label_):

    return np.asarray( _out_ - _label_ )    

#layer name为fc层    
def fc_hidden_layer_error(_layer_name_, _previous_layer_error_):
    
    return np.multiply( np.dot ( _previous_layer_error_ , fc_weights_dict[_layer_name_].T ) , calculate_active_layer_gradient(actived_output_dict['fc1'], _layer_name_) )      
    
#layer name 为fc 层
def pool_layer_error_from_fc(_error_from_fc, _layer_name_,):
    
    _fc_weights_ = fc_weights_dict[_layer_name_]
#    与fc连接的池化层的大小
    _shape_ = pooling_layer_ouput_size_dict[pooling_layer_name_list[len(pooling_layer_name_list) - 1]]
    return np.reshape(np.dot( _error_from_fc, _fc_weights_.T), [ _shape_[0], _shape_[1], _shape_[2] ]) 

#layer_name为 pool层名
def pool_layer_error_upsample(_error_matrix_, _layer_name_, _stride_x_ = 2, _stride_y_ = 2):

    _padded_upsampled_shape_ = pooling_layer_input_size_dict[_layer_name_]
    
    _upsampled_error_ = np.zeros((_padded_upsampled_shape_[0], _padded_upsampled_shape_[1], _padded_upsampled_shape_[2]))

    _index_array_ = pooling_layer_max_index_dict[_layer_name_]    
    
    _kernel_size_ = layer_params_size_dict[_layer_name_]
    
    _kernel_size_x_ = _kernel_size_[1]
    _kernel_size_y_ = _kernel_size_[0]
     
    _xs_=int((_padded_upsampled_shape_[1] - _kernel_size_x_)/_stride_x_+1)
    _ys_=int((_padded_upsampled_shape_[0] - _kernel_size_y_)/_stride_y_+1)
    channel=_padded_upsampled_shape_[2]
    
    for z in range(channel):
        _x_ = 0
        for i in range(_xs_):       # x 方向循环           
            _y_ = 0
            for j in range(_ys_):   # y 
                
                _index_ = _index_array_[j,i,z]
                _x_index_ = int(( _index_ ) / _kernel_size_x_ )
                _y_index_ = int((_index_ ) % _kernel_size_y_)

                _upsampled_error_[_y_ : _y_ + _kernel_size_y_ , _x_ : _x_ + _kernel_size_x_ , z][_y_index_,_x_index_] = _error_matrix_[j,i,z]
                
                _y_ = _y_ + _stride_y_
            _x_ = _x_ + _stride_x_   
    
#    print('upsample pool layer err',_upsampled_error_.shape)     
    return _upsampled_error_

#    _layer_name_ 为pooling层的名
def conv_layer_error_from_pool(_err_maxtrix_, _layer_name_):
#    print(_err_maxtrix_.shape)
    return np.multiply(pool_layer_error_upsample(_err_maxtrix_,_layer_name_), calculate_active_layer_gradient(pooling_layer_input_dict[_layer_name_], _layer_name_))
#    return pool_layer_error_upsample(_err_maxtrix,_layer_)

#注意没有channel维度    
def bp_conv_padding(_img_, _filter_size_x_, _filter_size_y_):
    _x_padding_=int((_filter_size_x_-1)/2)
    _y_padding_=int((_filter_size_y_-1)/2)
    
    return np.pad(_img_,((_y_padding_,_y_padding_), (_x_padding_,_x_padding_)),'constant')  


# 由conv层error求pool层error      
def bp_conv_one_channel(_err_, _weights_, _shape_, _filter_size_x_, _filter_size_y_, _stride_x_=1, _stride_y_=1):

    _x_num_ = int((_shape_[1]-1)/_stride_x_+1)                                  #在两个方向上滑动的次数,即输出的大小
    _y_num_ = int((_shape_[0]-1)/_stride_y_+1)
#卷积核的大小    
#    _filter_size_ = layer_params_size_dict[_layer_name_]

    _one_channel_out_err_ = np.empty((_y_num_,_x_num_))
    
    _err_ = bp_conv_padding(_err_,_filter_size_x_,_filter_size_y_)

    _x_ = 0
    for _i_ in range(0,_x_num_):
        
        _y_ = 0
        for _j_ in range(0,_y_num_):
            
            _one_channel_out_err_[_j_, _i_] =np.sum( np.multiply(_err_[_y_:_y_+_filter_size_y_, _x_:_x_+_filter_size_x_], _weights_))

            _y_ = _y_ + _stride_y_
        _x_ = _x_ + _stride_x_  
        
    return _one_channel_out_err_
       

#计算卷基层参数的更新梯度,_layer_name_ 为conv层名
def calculate_conv_weights_update_gradient(_err_, _previous_out_, _layer_name_):
    _size_ = layer_params_size_dict[_layer_name_]
    
    _xs_ = _size_[1] -1     #由kernel size 求循环次数
    _ys_ = _size_[0] -1
    
    _shape_list_ = conv_layer_output_size_dict[_layer_name_]
    _shape_x_ = _shape_list_[1]
    _shape_y_ = _shape_list_[0]
 
    _gradient_matrix_ = np.zeros((_size_[0],
                                  _size_[1],
                                  _size_[2],
                                  _size_[3]))
    
    for _out_channel_ in range(_size_[3]):
        _padded_err_ = bp_conv_padding( _err_[:,:,_out_channel_], _size_[1], _size_[0] )
        
        for _in_channel_ in range(_size_[2]):
            _previous_out_one_layer_ = _previous_out_[:,:,_in_channel_]

            for _i_ in range(_xs_):
                
                for _j_ in range(_ys_):
                    _gradient_matrix_[_j_, _i_, _in_channel_, _out_channel_] = np.sum( np.multiply(_padded_err_[_j_:_j_+_shape_y_, _i_:_i_+_shape_x_] ,_previous_out_one_layer_))

    return _gradient_matrix_


#_layer_name_ 为conv层名
def pool_layer_error_from_conv(_error_, _layer_name_):
    _size_ = layer_params_size_dict[_layer_name_]
    
    _out_shape_ = _size_[3]   #conv层输出的channel数
    _in_shape_ = _size_[2]            #conv层输入的channel数

    _out_size_ = conv_layer_input_size_dict[_layer_name_]
    _out_error_ = np.zeros((_out_size_[0],_out_size_[1] ,_out_size_[2] ))
#    print('size',_size_[2])
    _weights_ = conv_weights_dict[_layer_name_]
    for _out_channel_ in range(_out_shape_):
        
        for _in_channel_ in range(_in_shape_):
            _out_error_[:,:,_in_channel_] = _out_error_[:,:,_in_channel_] + bp_conv_one_channel(_error_[:,:,_out_channel_],
                                                                                                _weights_[:,:,_in_channel_,_out_channel_],
#                                                                                                _weights_list_[_out_channel_][:,:,_in_channel_],
                                                                                                _out_size_, 
                                                                                                _size_[1],
                                                                                                _size_[0]
                                                                                                )         
    
#    print(_out_error_.shape)        
    return _out_error_ / _out_shape_
 
#l2正则化
def l2_regularization(_input_):
    return ( np.sum( _input_ ** 2 ) ) ** 0.5 

         
#计算各层error       
def error_bp_op(_img_):
    _y_ = conv_inference_and_bp(_img_)
#    print('qian xiang chuan bo ',_y_)
    
    _loss_ = fc_outlayer_error(_y_,label)  
#    加入正则化
    _loss_ += regularization_rate * l2_regularization(_loss_)
#    print('fc output layer loss ', _loss_.shape)

    _fc2_weights_gradient_ = np.dot( actived_output_dict['fc1'].T, _loss_)

    
    _hidden_layer_err_ = fc_hidden_layer_error('fc2', _loss_)
#    加入正则化
    _hidden_layer_err_ += regularization_rate * l2_regularization(_hidden_layer_err_)
#    print('hidden layer error', _hidden_layer_err_.shape)

    _fc1_weights_gradient_ = np.dot( np.reshape(pooling_layer_output_dict['pool2'],( [1,676])).T, _hidden_layer_err_)
#    print('fc 1 weights gradient',_fc1_weights_gradient_.shape)
    
    _pool_2_err_ = pool_layer_error_from_fc(_hidden_layer_err_, 'fc1')
#    print(' pool 2 layer error',_pool_2_err_.shape)
    
    _conv2_err_ = conv_layer_error_from_pool(_pool_2_err_, 'pool2')
#    print('conv 2 layer error', _conv2_err_.shape)
    _conv2_weights_gradient_ = calculate_conv_weights_update_gradient(_conv2_err_, pooling_layer_output_dict['pool1'], 'conv2')
#    print('conv 2 weights gradient ',_conv2_weights_gradient_.shape)
    
    _pool_1_err_ = pool_layer_error_from_conv(_conv2_err_, 'conv2')
#    print('pool 1 layer error ',_pool_1_err_.shape)
    
    _conv1_err_ = conv_layer_error_from_pool(_pool_1_err_, 'pool1')
#    print('conv 1 layer error ',_conv1_err_.shape)
    _conv1_weights_gradient_ = calculate_conv_weights_update_gradient(_conv1_err_, _img_, 'conv1')
#    print('conv 1 weights gradient', _conv1_weights_gradient_.shape)
    
    weishts_gradient_dict['conv1'] += _conv1_weights_gradient_
    weishts_gradient_dict['conv2'] += _conv2_weights_gradient_
    weishts_gradient_dict['fc1'] +=  _fc1_weights_gradient_ 
    weishts_gradient_dict['fc2'] +=  _fc2_weights_gradient_ 
    
    biases_gradient_dict['conv1'] += np.sum( _conv1_err_, axis=(0,1) )
    biases_gradient_dict['conv2'] += np.sum( _conv2_err_, axis=(0,1) )
    biases_gradient_dict['fc1'] += _hidden_layer_err_
    biases_gradient_dict['fc2'] += _loss_
    
    print('inference ',_y_)
#    return  _conv1_weights_gradient_, _conv2_weights_gradient_, _fc1_weights_gradient_, _fc2_weights_gradient_
    return _loss_

positive_num = 0
negative_num = 0

def train_input():
    
    label.clear()
    if random.randint(1,10)>5:
        _img_ = cv2.imread(r'E:\faces_gray_resized5050\{}.jpg'.format(random.randint(1,150000)))
#        positive_num += 1
        label.append(1)
    else:
        _img_ = cv2.imread(r'E:\Img\opencv3negative_faces_data\face\negdata_gray\{}.jpg'.format(random.randint(1,80000)))
#        negative_num += 1
        label.append(-1)
    print('label',label)
#    归一化
    return _img_ / 255,label


def update_weights_and_params():   
    
    for _step_ in range(train_steps):
            
    
        for _i_ in range(batch_size):
            
            _input_ = train_input()
            print('loss',error_bp_op(_input_[0]))
#  求一个batch的平均梯度      
        for _name_ in gradient_name_list:
            weishts_gradient_dict[_name_] / batch_size
            biases_gradient_dict[_name_] / batch_size
#加入滑动平均值            
            if _step_ %(1/(1-moving_average_decay)) == 0.0:
                for _name4_ in gradient_name_list:
                    weights_gradient_shadow_dict[_name4_] = weishts_gradient_dict[_name4_]
                    biases_gradient_shadow_dict[_name4_] = biases_gradient_dict[_name4_]
            
            else:
                for _name5_ in gradient_name_list:
                    weights_gradient_shadow_dict[_name5_] = weishts_gradient_dict[_name5_] * (1- moving_average_decay) + weights_gradient_shadow_dict[_name5_] * moving_average_decay
                    biases_gradient_shadow_dict[_name5_] = biases_gradient_dict[_name5_] * (1- moving_average_decay) + biases_gradient_shadow_dict[_name5_] * moving_average_decay              
            
# 更新卷基层参数           
        for _name2_ in conv_layer_name_list: 
            conv_weights_dict[_name2_] = conv_weights_dict[_name2_] - learning_rate_base * weights_gradient_shadow_dict[_name2_]
            conv_biases_dict[_name2_] = conv_biases_dict[_name2_] - learning_rate_base * biases_gradient_shadow_dict[_name2_]
#            更新完参数后,存储梯度的数组清零
            weishts_gradient_dict[_name2_] = weishts_gradient_dict[_name2_] * 0
            biases_gradient_dict[_name2_] = biases_gradient_dict[_name2_] * 0
#  更新全连接层参数          
        for _name3_ in fc_layer_name_list:
            fc_weights_dict[_name3_] = fc_weights_dict[_name3_] - learning_rate_base * weishts_gradient_dict[_name3_]
            fc_biases_dict[_name3_] = fc_biases_dict[_name3_] - learning_rate_base * biases_gradient_dict[_name3_]
            
            weishts_gradient_dict[_name3_] = weishts_gradient_dict[_name3_] * 0
            biases_gradient_dict[_name3_] = biases_gradient_dict[_name3_] * 0
            
#        if _step_ % save_result_step == 0:
##                保存数据
#            with open(r'C:\Users\Administrator\Desktop\tensorflow\ccccccc7.txt','a') as f:
#                f.writelines([str(_step_),'#',str(loss_value),'#',str(recall_rate),'#',str(precision_rate),'\n'])        




generate_params()
update_weights_and_params()


end=time.clock() 
print("running time is %g s" % (end-start))
CNN(卷积神经网络)是一种常用的深度学习模型,用于图像识别、目标检测等任务。而反向传播CNN中用于训练网络的关键步骤之一。 反向传播(Backpropagation)是指通过计算误差(即预测值与真实值的差异)来调整神经网络的权重和偏置,以提高模型的预测准确度。在CNN中,反向传播实现主要包括两个步骤:前向传播反向传播。 首先,进行前向传播。首先将输入数据通过卷积层进行卷积运算,提取出图像的特征。然后,通过激活函数(如ReLU)来引入非线性。接下来,利用池化层来减小特征图的尺寸,并保留更显著的特征。最后,将处理后的特征输入到全连接层中,进行分类或回归等任务的预测。 其次,进行反向传播。首先,计算预测值与真实值之间的误差。然后,通过链式法则,将误差从输出层向输入层反向传播,并更新每个连接权重和偏置的数值。这样,网络中每个神经元的梯度都可以通过反向传播得到,从而调整网络参数,使其逐渐逼近真实值。此过程可以利用梯度下降等优化算法来完成。 具体地讲,反向传播过程主要分为四个步骤:计算损失函数对输出层的输入的偏导数、计算损失函数对输出层的权重的偏导数、计算损失函数对输入层的权和的偏导数、计算损失函数对输入图像的偏导数。通过不断迭代这四个步骤,不断更新参数,从而提高网络的训练效果。 综上所述,CNN中的反向传播通过计算误差来调整网络的权重和偏置,以提高模型的准确度。这个过程主要包括前向传播反向传播两个步骤,通过不断地更新参数,使得网络逐渐逼近真实值,从而达到优化模型的目的。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值