C++实现误差反向传播算法（BP神经网络）

最新推荐文章于 2024-08-18 00:00:52 发布

五更夏鸣

最新推荐文章于 2024-08-18 00:00:52 发布

阅读量2.1k

点赞数 3

分类专栏： C++ 文章标签： C++ 机器学习

本文链接：https://blog.csdn.net/weixin_40401944/article/details/80208926

版权

C++ 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

误差反向传播学习算法

实现Iris数据分类
Denverg
Secret Number
29,April 2018

实验目的

用C++实现BP神经网络

实验原理

人工神经网络模型

人们从40年代开始研究人脑神经元功能。1943年心理学家McCulloch 和 Pitts总结了神经元的一些基本特性，提出了形式神经元的数学描述和结构方法，即M-P神经元模型。

输入信号通过带权重的连接进行传递，神经元将接收到的总输入值与神经元的阈值进行比较，然后通过”激活函数”（activation function）处理用来产生神经元的输出。
理想中的激活函数是阶跃函数，但由于阶跃函数不连续不光滑，因此实际常用连续可导的 Sigmoid 函数。

为将Iris数据集进行分类，需要学习一个具有多层神经元的神经网络。此网络可以是一个四个神经元的输入层，三个神经元的输出层，一层隐层（隐层的神经元本次定义为6）。

误差反向传播算法
误差反向传播（Error Backpropagation）学习算法通常被称为BP算法，其基本思想是：如果网络的输出有错，则将网络的权值进行调整，以使得今后网络的输出误差朝最小的方向发展，属于梯度下降算法。

梯度下降算法

误差反向传播

实验内容

Code Part

#include "stdafx.h"

#include <stdio.h>
#include <cstdlib>
#include <cmath>
#include <math.h>
#include <iostream>
#include <time.h>
#include <vector>
#include <fstream>
#include <string>
#include <windows.h>

using namespace std;

#define INNODE 4 //input node number
#define OUTNODE 3 //output kinds
#define INF 9999999 
#define MAXNODE 6 //Each layer maximum node number
#define TRAINSAMPLE 75// half of the Iris sample

//initial weight,bias and execute normalization operation
void initialValue(vector<vector <double>> &weight1,vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2,int n1,int n2,int n3)
{
    srand(time(NULL));

    for (int i = 0; i < n2; i++)
    {
        for (int j = 0; j < n1; j++)
        {
            weight1[i][j] = rand()/double(RAND_MAX);
        }
    }
    for (int i = 0; i < n3; i++)
    {
        for (int j = 0; j < n2; j++)
        {
            weight2[i][j] = rand() / double(RAND_MAX);
        }
    }
    for (int i = 0; i < n2; i++)
    {
        bias1[i] = rand() / double(RAND_MAX);
    }
    for (int i = 0; i < n3; i++)
    {
        bias2[i] = rand() / double(RAND_MAX);
    }
}


double sigmoid(double z)
{
    return 1 / (1 + exp(-z));
}

//calculate the realoutputY
void computeY(vector<vector <double>> &weight1, vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2, int n1, int n2, int n3, vector<double> &X, vector<double>  &realoutY,vector<double> &hideY)
{
    //calculate the hidden layer out hideY first
    for (int i = 0; i < n2; i++)
    {   
        double sum = 0;
        for (int j = 0; j < n1; j++)
        {
            sum += weight1[i][j] * X[j];
        }
        sum = sigmoid(sum - bias1[i]);
        hideY[i] = sum;
    }
    //calculate the output layer out realoutY;

    for (int i = 0; i < n3; i++)
    {
        double sum = 0;
        for (int j = 0; j < n2; j++)
        {
            sum += weight2[i][j] * hideY[j];
        }
        sum = sigmoid(sum - bias2[i]);
        realoutY[i] = sum;
    }
}

//calculate the grade of the output layer
void computeOutputDY(int n3, vector<double> &realoutY, vector<double> Y, vector<double> &outputDweight)
{
    for (int i = 0; i < n3; i++)
    {
        outputDweight[i] = realoutY[i] * (1 - realoutY[i])*(Y[i] - realoutY[i]);
    }
}


//calculate the grade of the hidden layer
void computerHideDY(vector<vector<double>> weight2, vector<double> outputDweight, vector<double> hideY,int n2, int n3, vector<double> &HideDweight)
{
    for (int i = 0; i < n2; i++)
    {
        double sum = 0;
        for (int j = 0; j < n3; j++)
        {
            sum += weight2[j][i] * outputDweight[j];
        }
        HideDweight[i] = sum * hideY[i] * (1 - hideY[i]);
    }
}

//update weight1,2 and bias1,2
void updateWeight(vector<vector <double>> &weight1, vector<double> &bias1, vector<vector <double>> &weight2, vector<double> &bias2, int n1, int n2, int n3, vector<double> X, vector<double> &hideY, vector<double> outputDweight, vector<double> hideDweight, double ratio)
{
    for (int i = 0; i < n1; i++)
    {
        for (int j = 0; j < n2 ; j++)
        {
            weight1[j][i] += ratio * hideDweight[j] * X[i];
        }
    }
    for (int i = 0; i < n2; i++)
    {
        for (int j = 0; j < n3 ; j++)
        {
            weight2[j][i] += ratio * outputDweight[j] * hideY[i];
        }
    }
    for (int i = 0; i < n2; i++)
    {
        bias1[i] -= ratio * hideDweight[i];
    }
    for (int i = 0; i < n3; i++)
    {
        bias2[i] -= ratio * outputDweight[i];
    }
}

//calculate error of mean square
double computerError(vector<double> realoutY, vector<double> Y, int n)
{
    double error = 0.0;
    for (int i = 0; i < n; i++)
    {
        error += (realoutY[i] - Y[i])*(realoutY[i] - Y[i]);
    }
    return error;
}

//split funcion
void SplitString(const string& s, vector<string>& v, const string& c)
{
    string::size_type pos1, pos2;
    pos2 = s.find(c);
    pos1 = 0;
    while (string::npos != pos2)
    {
        v.push_back(s.substr(pos1, pos2 - pos1));

        pos1 = pos2 + c.size();
        pos2 = s.find(c, pos1);
    }
    if (pos1 != s.length())
        v.push_back(s.substr(pos1));
}

int main()
{
    DWORD strat_time = GetTickCount();
    //the node number of each layer
    int n1 = 4, n2 = 6, n3 = 3;

    //weight1[n2][n1] and bias[n2]
    vector<vector<double>> weight1(n2, vector<double>(n1));
    vector<vector<double>> weight2(n3, vector<double>(n2));
    vector<double> bias1(n2);
    vector<double> bias2(n3);

    //grade of the layer
    vector<double> outputDweight(n3);
    vector<double> hideDweight(n2);

    //the outputY of hidden layer final layer
    vector<vector<double>> realoutY(TRAINSAMPLE, vector<double>(OUTNODE));
    vector<double> hideY(n2);

    //X and Y
    vector<vector<double>> X(TRAINSAMPLE, vector<double>(INNODE));
    vector<vector<double>> Y(TRAINSAMPLE, vector<double>(OUTNODE));

    //train data
    ifstream myfile("C:\\Users\\Administrator\\Desktop\\test.txt");
    for (int i = 0; i < TRAINSAMPLE; i++)
    {
        string temp;
        getline(myfile, temp);
        vector<string> a;
        SplitString(temp, a, ",");
        for (int j = 0; j <INNODE; j++)
        {
            double dd;
            sscanf_s(a[j].c_str(), "%lf", &dd);
            X[i][j] = dd ;
            if (a[j+1].compare("Iris-setosa")==0)
            {
                Y[i] = {1,0,0} ;

            }
            if (a[j+1].compare("Iris-versicolor") == 0)
            {
                Y[i] = {0,1,0} ;

            }
            if (a[j+1].compare("Iris-virginica") == 0)
            {
                Y[i] = {0,0,1};

            }
        }

    }
    myfile.close();

    //initialvalue
    initialValue(weight1, bias1, weight2, bias2, n1, n2, n3);

    double err = INF;

    //stepsizes
    double ratio = 0.5;

    //train part
    int count = 0;
    while (err > 0.0005&&count<1000000)
    {
        err = 0.0;
        for (int i = 0; i < TRAINSAMPLE; i++)
        {
            computeY(weight1, bias1, weight2, bias2, n1, n2, n3, X[i], realoutY[i], hideY);
            computeOutputDY(n3, realoutY[i], Y[i], outputDweight);
            computerHideDY(weight2, outputDweight, hideY, n2, n3, hideDweight);
            updateWeight(weight1, bias1, weight2, bias2, n1, n2, n3, X[i], hideY, outputDweight, hideDweight, ratio);
            err += computerError(realoutY[i], Y[i], OUTNODE);
        }
        err = 0.5 * err;
        count++;
    }

    cout << "train data part:" << endl;
    //train out part
    for (int i = 0; i < TRAINSAMPLE; i++)
    {
        for (int j = 0; j < OUTNODE; j++)
        {
            cout << "realoutY["<<i<<"]["<<j<<"]::" << realoutY[i][j] << "----" << "Y["<<i<<"]["<<j<<"]::" << Y[i][j] << endl;
        }
        cout << endl;
    }
    cout << "test data part:" << endl;

    //test part
    vector<vector<double>> testX(TRAINSAMPLE, vector<double>(INNODE));
    vector<vector<double>> testY(TRAINSAMPLE, vector<double>(OUTNODE));

    ifstream myfile1("C:\\Users\\Administrator\\Desktop\\testy.txt");
    for (int i = 0; i < TRAINSAMPLE; i++)
    {
        string temp;
        getline(myfile1, temp);
        vector<string> a;
        SplitString(temp, a, ",");
        for (int j = 0; j <INNODE; j++)
        {
            double dd;
            sscanf_s(a[j].c_str(), "%lf", &dd);
            testX[i][j] = dd;
            if (a[j + 1].compare("Iris-setosa") == 0)
            {
                Y[i] = { 1,0,0 };

            }
            if (a[j + 1].compare("Iris-versicolor") == 0)
            {
                Y[i] = { 0,1,0 };

            }
            if (a[j + 1].compare("Iris-virginica") == 0)
            {
                Y[j] = { 0,0,1 };

            }
        }
    }
    myfile1.close();

    for (int i = 0; i < TRAINSAMPLE; i++)
    {
      computeY(weight1, bias1, weight2, bias2, n1, n2, n3, testX[i], testY[i], hideY);
    }

    for (int i = 0; i < TRAINSAMPLE; i++)
    {
        double max = -1;
        int index = -1;
        for (int j = 0; j < OUTNODE; j++)
        {
            cout << "testY[" << i << "][" << j << "]::" << testY[i][j] << "---";
            if (max <= testY[i][j])
            {
                max = testY[i][j];
                index = j;
            }
        }
        switch (index)
        {
        case 0:
            cout << "Iris-setosa" << endl;
            break;
        case 1:
            cout << "Iris-versicolor" << endl;
            break;
        case 2:
            cout << "Iris-virginica" << endl;
            break;
        default:
            break;
        }

    }
    DWORD end_time = GetTickCount();
    cout << "The run time is:" << (end_time - strat_time) / 1000.0 << "s" << endl;
    cout << "compute count:" << count << endl;
    //delete space 
    return 0;
}

结果分析

TEST 1
使用15个数据集作为训练样本，每个Iris种类各5个，使用15个数据集作为测试。本次偏差设置为0.0005
Alt text

测试集与数据集中的分类一致，正确率为100%

TEST 2
使用75个数据集作为训练样本，每个Iris种类各25个，使用75个数据集作为测试。本次偏差设置为0.005
Alt text
正确率为98.68%

TEST 3
使用75个数据集作为训练样本，每个Iris种类各25个，使用75个数据集作为测试。本次偏差设置为0.0005
Alt text

正确率为97.37%

数据图片未上传。

实验总结

误差反向传播的正确率的高低部分取决于随机生成的weight和bias，由于每次实验都会生成不同的随机数，固算法很难横向比较。
但和预期结果相比，正确率能稳定在95%以上，已经是较好的预期结果。
本次实验未单独写一个独立的矩阵运算类，而是用迭代的方法计算各种中间结果，这样可能可以更好的去理解算法。