目录
Part 1: Loading and Visualizing Data
Part 3: Compute Cost (Feedforward)
Part 4: Implement Regularization
Part 7: Implement Backpropagation
实现基本架构类
#include <core/core.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <ostream>
#include <typeinfo>
#include <time.h>
using namespace std;
using namespace cv;
class Nnetwork{
public:
int visibleSize, hiddenSize, outputSize, layer_num;
double lambda, cost, a;
Mat data,//input
pre_data,
pre_outp,
outp,//output
*b, *W, *bgrad, *Wgrad, *active_value, *test_Wgrad, *test_bgrad, *av;
void initParam();
Nnetwork();
Nnetwork(int visiblesize, int hiddensize, int outpsize, int layernums, double lambda) :
visibleSize(visiblesize),
hiddenSize(hiddensize),
outputSize(outpsize),
layer_num(layernums),
lambda(lambda)
{
initParam();
}
Mat sigmoid(Mat matrix);
double sigmoid(double num);
Mat mat_exp(Mat r);
Mat mat_log(Mat r);
void forward_propagation();
void showimage(Mat data, int pic_size, int num);
void test_readdata();
void test_readlabel();
void test_load_Param();
void test_nncost_1();
void test_nncost_2();
double test_nncost_3(int lambda, Mat *active_value, Mat *b, Mat *W);
Mat sigmoidGradient(Mat inp);
void writeMatToFile(cv::Mat& m, const char* filename);
void computeNumericalGradient();
Mat debugInitializeWeights(int fan_out, int fan_in);
void checkNNGradients();
void train();
double predict();
double pre_dict();
void before_train();
};
Part 1: Loading and Visualizing Data
Part 2: Loading Parameters
参见上一节:https://blog.csdn.net/Runner_of_nku/article/details/88815894
Part 3: Compute Cost (Feedforward)
Part 4: Implement Regularization
这一步的前向传播是读取的上一节中的参数,我们需要实现的是代价函数,代码如下:
void test_nncost_1()
{
delete[]active_value;
active_value = new Mat[2];
int data_size = outp.rows;
active_value[0] = repeat(b[0], 1, data_size);
active_value[1] = repeat(b[1], 1, data_size);
active_value[0] = sigmoid(W[0]*data.t()+active_value[0]);
active_value[1] = sigmoid(W[1]*active_value[0] + active_value[1]);
Mat yk = Mat::zeros(10, data_size, CV_64FC1);
for (int i = 0; i < data_size; i++)
yk.at<double>(int(outp.at<double>(i, 0))-1,i) = 1;
double J = sum((-1 * yk).mul(mat_log(active_value[1])) - (1 - yk).mul(mat_log(1 - active_value[1])))[0]/data_size;
cout << "Cost at parameters (loaded from ex4weights)\n(this value should be about 0.287629)\n" << J<<endl;
lambda=1;
J += lambda / 2 / data_size * (sum(W[0].mul(W[0]))[0] + sum(W[1].mul(W[1]))[0]);
cout << "Cost at parameters (loaded from ex4weights)\n(this value should be about 0.383770)\n" << J<<endl;
cost = J;
Mat delta3 = (active_value[1] - yk);
Mat tem = (delta3.t()*W[1]).t();
Mat delta2 = tem.mul(active_value[0]).mul(1 - active_value[0]);
Wgrad[1] = delta3*active_value[0].t() / data_size + lambda*W[1] / data_size;
Wgrad[0] = delta2*data / data_size + lambda*W[0] / data_size;
bgrad[1] = Mat(delta3.rows, 1, CV_64FC1, Scalar::all(0));
bgrad[0] = Mat(delta2.rows, 1, CV_64FC1, Scalar::all(0));
reduce(delta3, bgrad[1], 1, 1);
reduce(delta2, bgrad[0], 1, 1);
}
Part 5: Sigmoid Gradient
这一节很简单,实现sigmoid函数的求导,我们在实际计算的时候可以直接写sigmoid(x)*(1-sigmoid(x))即可
Part 6: Initializing Pameters
ex4中的随机数是直接给出了0.12,实际上这个0.12是怎么算出来的呢:
sqrt(6) / sqrt(hiddenSize + visibleSize + 1) ≈ 0.12
void initParam()
{
a = 0.2;
b = new Mat[layer_num];
W = new Mat[layer_num];
b[0] = Mat(hiddenSize, 1, CV_64FC1, Scalar(0));
b[layer_num - 1] = Mat(outputSize, 1, CV_64FC1, Scalar(0));
W[0] = Mat(hiddenSize, visibleSize, CV_64FC1);
W[layer_num - 1] = Mat(outputSize, hiddenSize, CV_64FC1);
for (int i = 1; i < layer_num - 1; i++)
{
W[i] = Mat(hiddenSize, hiddenSize, CV_64FC1);
b[i] = Mat(hiddenSize, 1, CV_64FC1, Scalar(0));
}
double r = sqrt(6) / sqrt(hiddenSize + visibleSize + 1);
for (int i = 0; i < layer_num; i++)
{
randu(W[i], Scalar::all(0), Scalar::all(1));
randu(b[i], Scalar::all(0), Scalar::all(1));
W[i] = W[i] * 2 * r - r;
b[i] = b[i] * 2 * r - r;
}
bgrad = new Mat[layer_num]; Wgrad = new Mat[layer_num];
for (int i = 0; i < layer_num; i++)
{
bgrad[i] = Mat::zeros(b[i].size(),CV_64FC1);
Wgrad[i] = Mat::zeros(W[i].size(), CV_64FC1);
}
cost = 0;
}
Part 7: Implement Backpropagation
实现反向传播,这一步我们连同梯度检查一起实现。
我们定义了一个3*5*3的一个微型神经网络,输入集是用sin函数确定的,输出集是1-3的标签,我们可以利用该微型神经网络对自己的代价函数进行debug
void checkNNGradients()
{
int input_layer_size = 3,hidden_layer_size = 5,num_labels = 3,m = 5;
Mat Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size),//size 5*4
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size),//3*6
X = debugInitializeWeights(m, input_layer_size - 1),//5*3
y = Mat(m, 1, CV_64FC1);
for (int i = 1; i <= m; i++)
{
y.at<double>(i - 1, 0) = i%num_labels+1;
}
//cout << y;
Nnetwork checknn(input_layer_size, hidden_layer_size, num_labels, 2,lambda);
checknn.data = X;
checknn.outp = y;
checknn.W[0] = Mat::zeros(hidden_layer_size, input_layer_size, CV_64FC1);//5*3
checknn.W[1] = Mat(num_labels, hidden_layer_size, CV_64FC1);//3*5
checknn.b[0] = Mat(hidden_layer_size, 1, CV_64FC1);//5*1
checknn.b[1] = Mat(num_labels, 1, CV_64FC1);//3*1
for (int i = 0; i < Theta1.rows; i++)
for (int j = 1; j < Theta1.cols; j++)
checknn.W[0].at<double>(i, j-1) = Theta1.at<double>(i,j);
for (int j = 0; j < Theta1.rows; j++)
checknn.b[0].at<doubl