误差反向传播学习算法
Denverg
Secret Number
29,April 2018
实验目的
用C++实现BP神经网络
实验原理
人工神经网络模型
人们从40年代开始研究人脑神经元功能。1943年心理学家McCulloch 和 Pitts总结了神经元的一些基本特性,提出了形式神经元的数学描述和结构方法,即M-P神经元模型。
输入信号通过带权重的连接进行传递,神经元将接收到的总输入值与神经元的阈值进行比较,然后通过”激活函数”(activation function)处理用来产生神经元的输出。
理想中的激活函数是阶跃函数,但由于阶跃函数不连续不光滑,因此实际常用连续可导的 Sigmoid 函数。
为将Iris数据集进行分类,需要学习一个具有多层神经元的神经网络。此网络可以是一个四个神经元的输入层,三个神经元的输出层,一层隐层(隐层的神经元本次定义为6)。
误差反向传播算法
误差反向传播(Error Backpropagation)学习算法通常被称为BP算法,其基本思想是:如果网络的输出有错,则将网络的权值进行调整,以使得今后网络的输出误差朝最小的方向发展,属于梯度下降算法。
梯度下降算法
误差反向传播
实验内容
Code Part
#include "stdafx.h"
#include <stdio.h>
#include <cstdlib>
#include <cmath>
#include <math.h>
#include <iostream>
#include <time.h>
#include <vector>
#include <fstream>
#include <string>
#include <windows.h>
using namespace std;
#define INNODE 4 //input node number
#define OUTNODE 3 //output kinds
#define INF 9999999
#define MAXNODE 6 //Each layer maximum node number
#define TRAINSAMPLE 75// half of the Iris sample
//initial weight,bias and execute normalization operation
void initialValue(vector<vector <double>> &weight1,vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2,int n1,int n2,int n3)
{
srand(time(NULL));
for (int i = 0; i < n2; i++)
{
for (int j = 0; j < n1; j++)
{
weight1[i][j] = rand()/double(RAND_MAX);
}
}
for (int i = 0; i < n3; i++)
{
for (int j = 0; j < n2; j++)
{
weight2[i][j] = rand() / double(RAND_MAX);
}
}
for (int i = 0; i < n2; i++)
{
bias1[i] = rand() / double(RAND_MAX);
}
for (int i = 0; i < n3; i++)
{
bias2[i] = rand() / double(RAND_MAX);
}
}
double sigmoid(double z)
{
return 1 / (1 + exp(-z));
}
//calculate the realoutputY
void computeY(vector<vector <double>> &weight1, vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2, int n1, int n2, int n3, vector<double> &X, vector<double> &realoutY,vector<double> &hideY)
{
//calculate the hidden layer out hideY first
for (int i = 0; i < n2; i++)
{
double sum = 0;
for (int j = 0; j < n1; j++)
{
sum += weight1[i][j] * X[j];
}
sum = sigmoid(sum - bias1[i]);
hideY[i] = sum;
}
//calculate the output layer out realoutY;
for (int i = 0; i < n3; i++)
{
double sum = 0;
for (int j = 0; j < n2; j++)
{
sum += weight2[i][j] * hideY[j];
}
sum = sigmoid(sum - bias2[i]);
realoutY[i] = sum;
}
}
//calculate the grade of the output layer
void computeOutputDY(int n3, vector<double> &realoutY, vector<double> Y, vector<double> &outputDweight)
{
for (int i = 0; i < n3; i++)
{
outputDweight[i] = realoutY[i] * (1 - realoutY[i])*(Y[i] - realoutY[i]);
}
}
//calculate the grade of the hidden layer
void computerHideDY(vector<vector<double>> weight2, vector<double> outputDweight, vector<double> hideY,int n2, int n3, vector<double> &HideDweight)
{
for (int i = 0; i < n2; i++)
{
double sum = 0;
for (int j = 0; j < n3; j++)
{
sum += weight2[j][i] * outputDweight[j];
}
HideDweight[i] = sum * hideY[i] * (1 - hideY[i]);
}
}
//update weight1,2 and bias1,2
void updateWeight(vector<vector <double>> &weight1, vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2, int n1, int n2, int n3, vector<double> X, vector<double> &hideY, vector<double> outputDweight, vector<double> hideDweight, double ratio)
{
for (int i = 0; i < n1; i++)
{
for (int j = 0; j < n2 ; j++)
{
weight1[j][i] += ratio * hideDweight[j] * X[i];
}
}
for (int i = 0; i < n2; i++)
{
for (int j = 0; j < n3 ; j++)
{
weight2[j][i] += ratio * outputDweight[j] * hideY[i];
}
}
for (int i = 0; i < n2; i++)
{
bias1[i] -= ratio * hideDweight[i];
}
for (int i = 0; i < n3; i++)
{
bias2[i] -= ratio * outputDweight[i];
}
}
//calculate error of mean square
double computerError(vector<double> realoutY, vector<double> Y, int n)
{
double error = 0.0;
for (int i = 0; i < n; i++)
{
error += (realoutY[i] - Y[i])*(realoutY[i] - Y[i]);
}
return error;
}
//split funcion
void SplitString(const string& s, vector<string>& v, const string& c)
{
string::size_type pos1, pos2;
pos2 = s.find(c);
pos1 = 0;
while (string::npos != pos2)
{
v.push_back(s.substr(pos1, pos2 - pos1));
pos1 = pos2 + c.size();
pos2 = s.find(c, pos1);
}
if (pos1 != s.length())
v.push_back(s.substr(pos1));
}
int main()
{
DWORD strat_time = GetTickCount();
//the node number of each layer
int n1 = 4, n2 = 6, n3 = 3;
//weight1[n2][n1] and bias[n2]
vector<vector<double>> weight1(n2, vector<double>(n1));
vector<vector<double>> weight2(n3, vector<double>(n2));
vector<double> bias1(n2);
vector<double> bias2(n3);
//grade of the layer
vector<double> outputDweight(n3);
vector<double> hideDweight(n2);
//the outputY of hidden layer final layer
vector<vector<double>> realoutY(TRAINSAMPLE, vector<double>(OUTNODE));
vector<double> hideY(n2);
//X and Y
vector<vector<double>> X(TRAINSAMPLE, vector<double>(INNODE));
vector<vector<double>> Y(TRAINSAMPLE, vector<double>(OUTNODE));
//train data
ifstream myfile("C:\\Users\\Administrator\\Desktop\\test.txt");
for (int i = 0; i < TRAINSAMPLE; i++)
{
string temp;
getline(myfile, temp);
vector<string> a;
SplitString(temp, a, ",");
for (int j = 0; j <INNODE; j++)
{
double dd;
sscanf_s(a[j].c_str(), "%lf", &dd);
X[i][j] = dd ;
if (a[j+1].compare("Iris-setosa")==0)
{
Y[i] = {1,0,0} ;
}
if (a[j+1].compare("Iris-versicolor") == 0)
{
Y[i] = {0,1,0} ;
}
if (a[j+1].compare("Iris-virginica") == 0)
{
Y[i] = {0,0,1};
}
}
}
myfile.close();
//initialvalue
initialValue(weight1, bias1, weight2, bias2, n1, n2, n3);
double err = INF;
//stepsizes
double ratio = 0.5;
//train part
int count = 0;
while (err > 0.0005&&count<1000000)
{
err = 0.0;
for (int i = 0; i < TRAINSAMPLE; i++)
{
computeY(weight1, bias1, weight2, bias2, n1, n2, n3, X[i], realoutY[i], hideY);
computeOutputDY(n3, realoutY[i], Y[i], outputDweight);
computerHideDY(weight2, outputDweight, hideY, n2, n3, hideDweight);
updateWeight(weight1, bias1, weight2, bias2, n1, n2, n3, X[i], hideY, outputDweight, hideDweight, ratio);
err += computerError(realoutY[i], Y[i], OUTNODE);
}
err = 0.5 * err;
count++;
}
cout << "train data part:" << endl;
//train out part
for (int i = 0; i < TRAINSAMPLE; i++)
{
for (int j = 0; j < OUTNODE; j++)
{
cout << "realoutY["<<i<<"]["<<j<<"]::" << realoutY[i][j] << "----" << "Y["<<i<<"]["<<j<<"]::" << Y[i][j] << endl;
}
cout << endl;
}
cout << "test data part:" << endl;
//test part
vector<vector<double>> testX(TRAINSAMPLE, vector<double>(INNODE));
vector<vector<double>> testY(TRAINSAMPLE, vector<double>(OUTNODE));
ifstream myfile1("C:\\Users\\Administrator\\Desktop\\testy.txt");
for (int i = 0; i < TRAINSAMPLE; i++)
{
string temp;
getline(myfile1, temp);
vector<string> a;
SplitString(temp, a, ",");
for (int j = 0; j <INNODE; j++)
{
double dd;
sscanf_s(a[j].c_str(), "%lf", &dd);
testX[i][j] = dd;
if (a[j + 1].compare("Iris-setosa") == 0)
{
Y[i] = { 1,0,0 };
}
if (a[j + 1].compare("Iris-versicolor") == 0)
{
Y[i] = { 0,1,0 };
}
if (a[j + 1].compare("Iris-virginica") == 0)
{
Y[j] = { 0,0,1 };
}
}
}
myfile1.close();
for (int i = 0; i < TRAINSAMPLE; i++)
{
computeY(weight1, bias1, weight2, bias2, n1, n2, n3, testX[i], testY[i], hideY);
}
for (int i = 0; i < TRAINSAMPLE; i++)
{
double max = -1;
int index = -1;
for (int j = 0; j < OUTNODE; j++)
{
cout << "testY[" << i << "][" << j << "]::" << testY[i][j] << "---";
if (max <= testY[i][j])
{
max = testY[i][j];
index = j;
}
}
switch (index)
{
case 0:
cout << "Iris-setosa" << endl;
break;
case 1:
cout << "Iris-versicolor" << endl;
break;
case 2:
cout << "Iris-virginica" << endl;
break;
default:
break;
}
}
DWORD end_time = GetTickCount();
cout << "The run time is:" << (end_time - strat_time) / 1000.0 << "s" << endl;
cout << "compute count:" << count << endl;
//delete space
return 0;
}
结果分析
TEST 1
使用15个数据集作为训练样本,每个Iris种类各5个,使用15个数据集作为测试。本次偏差设置为0.0005
测试集与数据集中的分类一致,正确率为100%
TEST 2
使用75个数据集作为训练样本,每个Iris种类各25个,使用75个数据集作为测试。本次偏差设置为0.005
正确率为98.68%
TEST 3
使用75个数据集作为训练样本,每个Iris种类各25个,使用75个数据集作为测试。本次偏差设置为0.0005
正确率为97.37%
数据图片未上传。
实验总结
误差反向传播的正确率的高低部分取决于随机生成的weight和bias,由于每次实验都会生成不同的随机数,固算法很难横向比较。
但和预期结果相比,正确率能稳定在95%
以上,已经是较好的预期结果。
本次实验未单独写一个独立的矩阵运算类,而是用迭代的方法计算各种中间结果,这样可能可以更好的去理解算法。