要点
1、sigmoid函数做为激活函数,二分类交叉熵函数做损失函数
2、可以同时对整个训练集进行训练(已经向量化)
3、梯度的反向传播,第i层a(i)的梯度由第(i+1)每一个神经元对a(i)的梯度求和得到(或者求平均)
4、如果发现问题,或者有疑问可以交流一下,因为我也是第一次真正手写实现
待优化
1、用于训练的函数中其中有两步可合并为一步(已标注)
2、仅适用训练集验证拟合效果,可以看得出来确实是在收敛,但未应用到真实的数据集去验证
3、计算中间值的代码不够灵活,优化后可适用于任意数量的隐藏层神经网络训练
效果
代码
mian
clear all
clc;
%% 神经网络初始化
%参数设置
Net_Scale = [4 5 1];
step = 0.1;
nums_iteration = 200;
%偏置添加 & 初始化
Neural_Net_layer_1 = rand([Net_Scale(2) Net_Scale(1)]+[0 1]);
Neural_Net_layer_2 = rand([Net_Scale(3) Net_Scale(2)]+[0 1]);
%% 数据导入
training_x = [-1 -2 -3 -4;4 3 2 1;3 5 -1 4]';
training_y = [0 1 0];
%% 神经网络训练
[Neural_Net_layer_1, Neural_Net_layer_2]=train_neural_net(Neural_Net_layer_1, Neural_Net_layer_2,training_x,training_y,step, nums_iteration);
train_neural_net
function [Neural_Net_layer_1, Neural_Net_layer_2]= train_neural_net(Neural_Net_layer_1, Neural_Net_layer_2,training_x,training_y,step,nums_iteration)
%接收每层神经网络层参数, 训练数据
%返回每层神经网络参数
%每层参数规模
size_layer_1=size(Neural_Net_layer_1);
size_layer_2=size(Neural_Net_layer_2);
for i = 1: nums_iteration
%输出层计算 arrayfun函数:激活函数(Sigmoid)作用于矩阵内每个数值
%第一层
W_X_1 = Neural_Net_layer_1(:,1: size_layer_1(2)-1) * training_x;
W_X_B_1 = W_X_1+ Neural_Net_layer_1(:,size_layer_1(2));
layer_1 = arrayfun(@(x) 1/(1+exp(-x)), W_X_B_1);
%第二层
W_X_2 = Neural_Net_layer_2(:,1: size_layer_2(2)-1) * layer_1;
W_X_B_2 = W_X_2+ Neural_Net_layer_2(:,size_layer_2(2));
layer_2 = arrayfun(@(x) 1/(1+exp(-x)), W_X_B_2);
%损失函数误差计算
%第三层
a_grad_3 = arrayfun(@(x,y) (1-y)./(1-x)-(y./x),layer_2,training_y);
%第二层
a_grad_2 = Neural_Net_layer_2(:,1: size_layer_2(2)-1)'*a_grad_3;
%激活函数自变量梯度计算
%第三层
z_grad_3 = a_grad_3.*layer_2.*(1-layer_2);
%第二层
z_grad_2 = a_grad_2.*layer_1.*(1-layer_1);
%{
%以上两步融合
%损失函数梯度 * 激活函数梯度
%第三层
z_grad_3 = layer_2-training_y;
%第二层
z_grad_2 = a_grad_2.*layer_1.*(1-layer_1)
%}
%W的梯度下降
Neural_Net_layer_2(:,1: size_layer_2(2)-1) = Neural_Net_layer_2(:,1: size_layer_2(2)-1) - step.*z_grad_3*layer_1';
Neural_Net_layer_1(:,1: size_layer_1(2)-1) = Neural_Net_layer_1(:,1: size_layer_1(2)-1) - step.*z_grad_2*training_x';
%b的梯度下降
Neural_Net_layer_2(:,size_layer_2(2)) = Neural_Net_layer_2(:,size_layer_2(2)) - sum(step*z_grad_3,2);
Neural_Net_layer_1(:,size_layer_1(2)) = Neural_Net_layer_1(:,size_layer_1(2)) - sum(step*z_grad_2,2);
end
disp(["实际y值:" training_y])
disp(["拟合y值:" layer_2])
end
待优化(1)已完成
function [Neural_Net_layer_1, Neural_Net_layer_2]= train_neural_net(Neural_Net_layer_1, Neural_Net_layer_2,training_x,training_y,step,nums_iteration)
%接收每层神经网络层参数, 训练数据
%返回每层神经网络参数
%每层参数规模
size_layer_1=size(Neural_Net_layer_1);
size_layer_2=size(Neural_Net_layer_2);
for i = 1: nums_iteration
%输出层计算 arrayfun函数:激活函数(Sigmoid)作用于矩阵内每个数值
%第一层
W_X_1 = Neural_Net_layer_1(:,1: size_layer_1(2)-1) * training_x;
W_X_B_1 = W_X_1+ Neural_Net_layer_1(:,size_layer_1(2));
layer_1 = arrayfun(@(x) 1/(1+exp(-x)), W_X_B_1);
%第二层
W_X_2 = Neural_Net_layer_2(:,1: size_layer_2(2)-1) * layer_1;
W_X_B_2 = W_X_2+ Neural_Net_layer_2(:,size_layer_2(2));
layer_2 = arrayfun(@(x) 1/(1+exp(-x)), W_X_B_2);
%{
%
%损失函数误差计算
%第三层
a_grad_3 = arrayfun(@(x,y) (1-y)./(1-x)-(y./x),layer_2,training_y);
%第二层
a_grad_2 = Neural_Net_layer_2(:,1: size_layer_2(2)-1)'*a_grad_3.*layer_2.*(1-layer_2);
%激活函数自变量梯度计算
%第三层
z_grad_3 = a_grad_3.*layer_2.*(1-layer_2);
%第二层
z_grad_2 = a_grad_2.*layer_1.*(1-layer_1);
%}
%以上两步融合
%损失函数梯度 * 激活函数梯度
%第三层
z_grad_3 = layer_2-training_y;
%第二层
z_grad_2 = Neural_Net_layer_2(:,1: size_layer_2(2)-1)'*z_grad_3.*layer_1.*(1-layer_1);
%W的梯度下降
Neural_Net_layer_2(:,1: size_layer_2(2)-1) = Neural_Net_layer_2(:,1: size_layer_2(2)-1) - step.*z_grad_3*layer_1';
Neural_Net_layer_1(:,1: size_layer_1(2)-1) = Neural_Net_layer_1(:,1: size_layer_1(2)-1) - step.*z_grad_2*training_x';
%b的梯度下降
Neural_Net_layer_2(:,size_layer_2(2)) = Neural_Net_layer_2(:,size_layer_2(2)) - sum(step*z_grad_3,2);
Neural_Net_layer_1(:,size_layer_1(2)) = Neural_Net_layer_1(:,size_layer_1(2)) - sum(step*z_grad_2,2);
end
disp(["实际y值:" training_y])
disp(["拟合y值:" layer_2])
end