【参考资料】
1、深度学习笔记三:反向传播(backpropagation)算法
https://blog.csdn.net/xierhacker/article/details/53431207
吴恩达机器学习 - 神经网络的反向传播算法
https://blog.csdn.net/wyg1997/article/details/80766153
【神经网络实现】
% 一、基础数据
% 0、训练数据
cd D:\study\AI\data\ex3;
% 定义神经网络输入层, 即可生成X,y
load('ex3data1.mat');% training data stored in arrays X, y
m = size(X, 1);
rand_indices = randperm(m); % 随机置换向量
sel = X(rand_indices(1:100), :); % 随机显示100个图片
displayData(sel);
% 1、选择神经网络模型并随机初始化参数
% 1-1、模型参数
input_size = 400; % 输入层(图像像素为20x20)
hidden_size = 25; % 隐藏层(25个单元)
num = 10; % 分类数(0-9)
% 1-2、随机初始化权重
function theta = initWeights(L_in, L_out)
theta = zeros(L_out, 1 + L_in);
epsilon = 0.12; % 不是取值0.0001吗
theta = rand(L_out, 1+L_in)*2*epsilon - epsilon;
end
theta1 = initWeights(input_size, hidden_size); % 25x401
theta2 = initWeights(hidden_size, num); % 10x26
theta = [theta1(:) ; theta2(:)]; % 10285x1
% 二、损失函数
function g = sigmoidGradient(z)
g = zeros(size(z));
s = sigmoid(z);
g = s.*(1 - s);
end
function [J grad] = nn_CostFunction(theta, input_size, hidden_size, num, X, y, lambda)
m = size(X, 1);
ones0 = ones(m,1);
size1 = hidden_size * (input_size + 1);
% y one hot(统一使用m x num)
ylabel = zeros(m, num);
for i=1:m
ylabel(i, y(i)) = 1;
end
theta1 = reshape(theta(1:size1), hidden_size, input_size + 1); % 25x401
theta2 = reshape(theta(1 + size1:end), num, hidden_size + 1); % 10x26
% 2、利用正向传播方法计算各层神经元状态z、激活值a
% input
a1 = [ones0 X]; % 5000x401
% hidden
z2 = a1*theta1'; % 5000x25
a2 = [ones0 sigmoid(z2)]; % 5000x26
% output
z3 = a2*theta2'; % 5000x10
a3 = sigmoid(z3); % 5000x10
% 3、计算代价函数J(θ)
J = sum(sum( -(ylabel .* log(a3) + ( 1 - ylabel ) .* log(1 - a3))))/m;
% Reg = λ/(m*2) * ∑ θ**2 (第一列为常数项无需加和)
J = J + lambda/2/m * (sum(sum(theta1(:, 2:end).^2)) + sum(sum(theta2(:, 2:end).^2)));
% 4、反向传播方法计算所有偏导数(注意矩阵变换)
% 4-1、计算误差δ
% δ[3] = (a[3] - y)*g′z[3] !!!计算过程为何不乘以导数
delta3 = a3 - ylabel; % a3:5000x10,ylabel:5000x10 => delta3:5000x10
% δ[2] = θ[2]′*δ[3].*g′z[2]
delta2 = delta3 * theta2(:,2:end) .* sigmoidGradient(z2); % delta3:5000x10,theta2:10x26,z2:5000x25 => delta2:5000x25
% 4-2、计算梯度grad
% ∂J/∂θ[l] = a[L-1]*δ[L]
grad1 = delta2' * a1 / m; % delta2:5000x25,a1:5000x401 => grad1:25x401
grad2 = delta3' * a2 / m; % delta3:5000x10,a2:5000x26 => grad2:10x26
% 4-3、添加正则约束
grad1(:, 2:end) = grad1(:, 2:end) + lambda/m*theta1(:, 2:end);
grad2(:, 2:end) = grad2(:, 2:end) + lambda/m*theta2(:, 2:end);
grad = [ grad1(:) ; grad2(:) ];
end
% 5、进行梯度检验,然后关闭
% checkNNGradients(lambda); % 验证梯度
% 6、计算最小化代价函数
% 6-1、使用高级算法训练神经网络
options = optimset('MaxIter', 500);
lambda = 1;
% fmincg 返回的固定格式
[nn_theta, J, exit_flag] = fmincg(@(t) nn_CostFunction(t, input_size, hidden_size, num, X, y, lambda), theta, options);
% 6-2、转换θ
size1 = hidden_size * (input_size + 1);
nn_theta1 = reshape(nn_theta(1:size1), hidden_size, input_size + 1);
nn_theta2 = reshape(nn_theta(1 + size1:end), num, hidden_size + 1);
% 三、根据模型预测
% nn_theta1:25x401,nn_theta2:10x26,X:5000x400
function pred = predict(nn_theta1, nn_theta2, X)
m = size(X, 1);
num_labels = size(nn_theta2, 1);
a1 = [ones(m, 1) X];
a2 = sigmoid(a1*nn_theta1');
a3 = sigmoid([ones(size(a2, 1), 1) a2] * nn_theta2');
pred = zeros(m, 1);
[v, pred] = max(a3, [], 2);
end
pred = predict(nn_theta1, nn_theta2, X);
fprintf('Training Set Accuracy: %f\n', mean(double(pred == y)) * 100);
% Iteration 200 | Cost: 3.380794e-001 Training Set Accuracy: 99.200000
% Iteration 500 | Cost: 3.171951e-001 Training Set Accuracy: 99.560000