理论参考:
http://www.cnblogs.com/wengzilin/archive/2013/04/24/3041019.html
C实现:本程序的隐含层的激活函数采用了sigmoid函数和双曲正切函数(tanh)两种,第一种效果不是太好,双曲正切函数的效果较好。
另外,学习速率、训练次数及误差极限的选择也很重要。
#include<iostream>
using namespace std;
const int InputN = 4;//输入层神经元个数
const int HN = 9;//隐含层神经元个数
const int OutN = 1;//输出层神经元个数
const int datanum = 12;//训练样本数
int main()
{
double x_out[InputN];//输入层输出
double hn_out[HN];//隐含层输出
double y_out[OutN];//输出层输出
double y[OutN];//输出层期望输出值
double w[InputN][HN];//输入层到隐含层的权值
double v[HN][OutN];//隐含层到输出层的权值
double deltaw[InputN][HN];//输入层到隐含层权值的变化
double deltav[HN][OutN];//隐含层到输出层权值的变化
double g[OutN];//反向传递误差时输出层到隐含层传递的一个系数
double e[HN];//反向传递误差时隐含层到输出层传递的一个系数
double error;//全局误差
double errlimit = 0.001;//预设的误差极限
double alpha = 0.1, beta = 0.1;//学习速率
int loop = 0;//训练次数
int times = 100000;//预设的训练次数上限
int i, j, m;
double max, min;
double sumtemp, errtemp;
struct{
double input[InputN];
double teach[OutN];
}data[datanum] = { { 0.336129,2.06572,1.06265,0.899193,1 },
{ 0.298629,2.35787,1.62205,0.880307,1 },
{ 0.366989,2.02294,1.13088,0.911248,1 },
{ 0.397162,1.89644,1.06084,0.918811,1 },
{ 0.368128,2.01313,0.792441,0.911679,1 },
{ 0.426591,1.80551,0.650104,0.924052,1 },
{ 0.422701,1.76045,1.64066,0.920759,0 },
{ 0.435797,1.67332,0.464393,0.938743,0 },
{ 0.330855,2.03024,1.45078,0.906025,0 },
{ 0.409903,1.79506,1.32226,0.921901,0 },
{ 0.443406,1.66463,0.596349,0.932647,0 },
{ 0.382446,1.79608,1.34416,0.922928,0 }};
//初始化权值
for (i = 0; i < InputN; i++)
{
for (j = 0; j < HN; j++)
{
w[i][j] = ((double)rand() / 32767.0) * 2 - 1;//初始化输入层到隐含层的权值
deltaw[i][j] = 0;
}
}
for (i = 0; i < HN; i++)
{
for (j = 0; j < OutN; j++)
{
v[i][j] = ((double)rand() / 32767.0) * 2 - 1;//初始化隐含层到输出层的权值
deltav[i][j] = 0;
}
}
//训练权值系数
while (loop < times)//对于每一次训练
{
loop++;
error = 0.0;
for (m = 0; m < datanum; m++)//对于每一个样本
{
max = 0.0;
min = 0.0;
for (i = 0; i < InputN; i++)//设置输入层的输出
{
x_out[i] = data[m].input[i];
if (max < x_out[i])
max = x_out[i];
if (min>x_out[i])
min = x_out[i];
}
for (i = 0; i < InputN; i++)//求样本输入数据的最大值和最小值,将所有的输入归一化
{
x_out[i] = (x_out[i] - min) / (max - min);
}
for (i = 0; i < OutN; i++)//输出层的期望输出
{
y[i] = data[m].teach[i];
}
for (i = 0; i < HN; i++)//计算隐含层的输出
{
sumtemp = 0.0;
for (j = 0; j < InputN; j++)
sumtemp += w[j][i] * x_out[j];
//hn_out[i] = 1 / (1 + exp(-1 * sumtemp));//sigmoid函数
hn_out[i] = tanh(sumtemp);//tanh函数,双曲正切函数
}
for (i = 0; i < OutN; i++)//计算输出层的输出
{
sumtemp = 0.0;
for (j = 0; j < HN; j++)
sumtemp += v[j][i] * hn_out[j];
y_out[i] = 1/(1+exp(-1*sumtemp));//sigmoid函数
}
//误差反向传播
for (i = 0; i < OutN; i++)
{
errtemp = y[i] - y_out[i];
g[i] = errtemp*y_out[i] * (1.0 - y_out[i]);//反向传递误差时输出层到隐含层传递的一个系数
error += errtemp*errtemp;//全局误差
}
for (i = 0; i < HN; i++)
{
errtemp = 0.0;
for (j = 0; j < OutN; j++)
errtemp += g[j] * v[i][j];
//e[i] = errtemp*hn_out[i] * (1.0 - hn_out[i]);//反向传递误差时隐含层到输出层传递的一个系数
e[i] = errtemp*(1+hn_out[i]) * (1.0 - hn_out[i]);
}
for (i = 0; i < OutN; i++)//调整隐含层到输出层的权值
{
for (j = 0; j < HN; j++)
{
deltav[j][i] = alpha*deltav[j][i] + beta*g[i] * hn_out[j];
v[j][i] += deltav[j][i];
}
}
for (i = 0; i < HN; i++)//调整隐含层到输出层的权值
{
for (j = 0; j < InputN; j++)
{
deltaw[j][i] = alpha*deltaw[j][i] + beta*e[i] * x_out[j];
w[j][i] += deltaw[j][i];
}
}
}
error = error / 2;
if (loop % 10000 == 0)
{
cout << "全局误差error=" << error << endl;
}
if (error < errlimit)
break;
}
//利用上面得到的权值系数进行检验
for (m = 0; m < datanum; m++)
{
for (i = 0; i < InputN; i++)
{
x_out[i] = data[m].input[i];
}
for (i = 0; i < HN; i++)//计算隐含层的输出
{
sumtemp = 0.0;
for (j = 0; j < InputN; j++)
sumtemp += w[j][i] * x_out[j];
//hn_out[i] = 1 / (1 + exp(-1 * sumtemp));//sigmoid函数
hn_out[i] = tanh(sumtemp);
}
for (i = 0; i < OutN; i++)//计算输出层的输出
{
sumtemp = 0.0;
for (j = 0; j < HN; j++)
sumtemp += v[j][i] * hn_out[j];
y_out[i] = 1 / (1 + exp(-1 * sumtemp));//sigmoid函数
cout << "y_out[" << i << "]=" << y_out[i] <<"\t";
}
cout << endl;
}
//利用上面的权值系数对新的数据进行检验
struct{
double input[InputN];
}newdata[5] = { { 0.326761, 2.19594, 1.31807, 0.899404 },
{ 0.38063, 1.85816, 0.68933, 0.935696 },
{ 0.437653, 1.66873, 0.865847, 0.933564 },
{ 0.568045, 1.22784, 0.62355, 0.958703 },
{ 0.422701, 1.76045, 1.64066, 0.920759 } };
for (m = 0; m < 5; m++)
{
for (i = 0; i < InputN; i++)
{
x_out[i] = newdata[m].input[i];
}
for (i = 0; i < HN; i++)//计算隐含层的输出
{
sumtemp = 0.0;
for (j = 0; j < InputN; j++)
sumtemp += w[j][i] * x_out[j];
//hn_out[i] = 1 / (1 + exp(-1 * sumtemp));//sigmoid函数
hn_out[i] = tanh(sumtemp);
}
for (i = 0; i < OutN; i++)//计算输出层的输出
{
sumtemp = 0.0;
for (j = 0; j < HN; j++)
sumtemp += v[j][i] * hn_out[j];
y_out[i] = 1 / (1 + exp(-1 * sumtemp));//sigmoid函数
cout << "y_out[" << i << "]=" << y_out[i] << "\t";
}
cout << endl;
}
system("pause");
return 0;
}