//BackPropagation.h
#ifndef BackPropagation_H
#define BackPropagation_H
#include<stdio.h>
#include<math.h>
#include<time.h>
#include<stdlib.h>
#include<assert.h>
class backPro
{
public:
~backPro();
backPro(int n1, int *sz, double b, double a);//init and allocatrs memory
void bpgt(double *in, double *tgt);// bp error for one set of input
void ffwd(double *in);// feed forwards activations for one set of inputs
double mse(double *tgt) const;// return mean square error of the net
double output(int i) const; // return i'th output of the net
private:
double **out;
double **delta;
double ***weight;
int num1;
int *lsize;
double beta;
double alpha;
double ***prevDwt;
double sigmoid(double in);
};
//initializes and allocates memory on heap
backPro::backPro(int n1, int *sz, double b, double a):beta(b),alpha(a)
{
num1 = n1;
lsize = new int[num1];
for(int i = 0; i < num1; i ++)
lsize[i] = sz[i];
// allocate memmory for output of each neuron
out = new double*[num1];
for(int i = 0; i < num1; i ++)
out[i] = new double[lsize[i]];
// allocate memory for delta
delta = new double*[num1];
for(int i = 1; i < num1; i ++)
delta[i] = new double[lsize[i]];
// allocate memory for weights
weight = new double**[num1];
for(int i = 1; i < num1; i ++)
weight[i] = new double*[lsize[i]];
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
weight[i][j] = new double[lsize[i-1]+1];
//allocate memory for previous weight
prevDwt = new double**[num1];
for(int i = 1; i < num1; i ++)
prevDwt[i] = new double*[lsize[i]];
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
prevDwt[i][j] = new double[lsize[i-1]+1];
// seed and assign random weights
srand((unsigned)(time(NULL)));
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
for(int k = 0; k < lsize[i-1]+1; k ++)
weight[i][j][k] = (double)(rand())/(RAND_MAX/2)-1; //32767
// initialize previous weights to 0 for first iteration
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
for(int k = 0; k < lsize[i-1]+1; k ++)
prevDwt[i][j][k] = (double)0.0;
}
backPro::~backPro()
{
// free out
for(int i = 0; i < num1; i ++)
delete[] out[i];
delete[] out;
// free delta
for(int i = 1; i < num1; i ++)
delete[] delta[i];
delete[] delta;
//free weight
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
delete[] weight[i][j];
for(int i = 1; i < num1; i ++)
delete[] weight[i];
delete[] weight;
// free prevDwt
for(int i = 1; i < num1; i ++)
for(int j = 0; j < lsize[i]; j ++)
delete[] prevDwt[i][j];
for(int i = 1; i < num1; i ++)
delete[] prevDwt[i];
delete[] prevDwt;
// free layer info
delete[] lsize;
}
double backPro::sigmoid(double in)
{
return (double)(1/(1+exp(-in)));
}
// mean square error
double backPro::mse(double *tgt) const
{
double mse = 0;
for(int i = 0; i < lsize[num1-1]; i ++)
mse += (tgt[i]-out[num1-1][i]) * (tgt[i]-out[num1-1][i]);
return mse/2;
}
// return i'th output of the net
double backPro::output(int i) const
{
return out[num1-1][i];
}
// feed forward one set of input
void backPro::ffwd(double *in)
{
double sum;
// assign content to input layer
for(int i = 0; i < lsize[0]; i ++)
out[0][i] = in[i]; //output_from_neuron(i,j) J'th neuron in I'th layer
// assign output(activation) value to each neuron using sigmoid fune
for(int i = 1; i < num1; i ++)// for each layer
{
for(int j = 0; j < lsize[i]; j ++)// for each neuron in current layer
{
sum = 0.0;
for(int k = 0; k < lsize[i-1]; k ++)// for input from each neuron in preceeding
sum += out[i-1][k] * weight[i][j][k];// apply weight to inputs and add to sum
sum += weight[i][j][lsize[i-1]]; // apply bias
out[i][j] = sigmoid(sum); // apply sigmoid function
}
}
}
// bp errors from output layer uptill the first hidden layer
void backPro::bpgt(double *in, double *tgt)
{
double sum;
// update output values for each neuron
ffwd(in);
// find delta for output layer
for(int i = 0; i < lsize[num1-1]; i ++)
delta[num1-1][i] = out[num1-1][i]*(1-out[num1-1][i])*(tgt[i]-out[num1-1][i]);
// find delta for hidden layers
for(int i = num1-2; i > 0; i --)
{
for(int j = 0; j < lsize[i]; j ++)
{
sum = 0.0;
for(int k = 0; k < lsize[i+1]; k ++)
sum += delta[i+1][k]*weight[i+1][k][j];
delta[i][j] = out[i][j]*(1-out[i][j])*sum;
}
}
// apply momentum(does nothing if alpha = 0)
for(int i = 1; i < num1; i ++)
{
for(int j = 0; j < lsize[i]; j ++)
{
for(int k = 0; k < lsize[i-1]; k ++)
weight[i][j][k] += alpha*prevDwt[i][j][k];
weight[i][j][lsize[i-1]] += alpha*prevDwt[i][j][lsize[i-1]];
}
}
// adjust weights using steepest descent
for(int i = 1; i < num1; i ++)
{
for(int j = 0; j < lsize[i]; j ++)
{
for(int k = 0; k < lsize[i-1]; k ++)
{
prevDwt[i][j][k] = beta*delta[i][j]*out[i-1][k];
weight[i][j][k] += prevDwt[i][j][k];
}
prevDwt[i][j][lsize[i-1]] = beta*delta[i][j];
weight[i][j][lsize[i-1]] += prevDwt[i][j][lsize[i-1]];
}
}
}
#endif
#include <iostream>
#include"BackPropagation.h"
using namespace std;
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char** argv)
{
// prepare XOR traing data
double data[][3] = {
0, 0, 0,
0, 1, 1,
1, 0, 1,
1, 1, 0};
// prepare test data
double testData[][2] = {
0, 0,
0, 1,
1, 0,
1, 1};
int numLayers = 3, lsz[3] = {2, 2, 1};
double beta = 0.3, alpha = 0.1, Thresh = 0.00001;
long num_iter = 2000000;
backPro *bp = new backPro(numLayers, lsz, beta, alpha);
cout<<endl<<"Now training the network..."<<endl;
long i;
for(i = 0; i < num_iter; i ++)
{
bp->bpgt(data[i%4], &data[i%4][2]);
if(bp->mse(&data[i%4][2]) < Thresh)
{
cout<<endl<<"Network Trained. Threshold value achieved in "<<i<<"iterations"<<endl;
cout<<"MSE: "<<bp->mse(&data[i%4][2])<<endl<<endl;
break;
}
if(i % (num_iter/10) == 0)
cout<<endl<<"MSE: "<<bp->mse(&data[i%4][2])<<"...Training..."<<endl;
}
if(i == num_iter)
cout<<endl<<i<<" iterations completed..."<<"MSE: "<<bp->mse(&data[(i-1)%4][2])<<endl;
cout<<"Now using the trained network to make predctions on test data..."<<endl<<endl;
for(i = 0; i < 4; i ++)
{
bp->ffwd(testData[i]);
cout<<testData[i][0]<<" "<<testData[i][1]<<" "<<bp->output(0)<<endl;
}
return 0;
}