公式推导
三层BP神经网络如上图所示。其中,
x
i
x_i
xi表示第
i
i
i个输入层节点的输入值,也是其输出值,
z
j
z_j
zj表示第
j
j
j个隐藏层节点的输出值,
y
k
y_k
yk表示第
k
k
k个输出层节点的输出值,
v
i
j
v_{ij}
vij表示从第i个输入层节点到第j个隐藏层节点的权重,
w
j
k
w_{jk}
wjk表示从第
j
j
j个隐藏层节点到第
k
k
k个输出层节点的权重,第
j
j
j个隐藏层节点的阈值为
θ
j
\theta_j
θj,第
k
k
k个输出层节点的阈值为
γ
k
\gamma_k
γk,激活函数采用Sigmoid函数:
f
(
x
)
=
1
1
+
e
−
x
f\left(x\right)=\frac{1}{1+e^{-x}}
f(x)=1+e−x1 Sigmoid函数的导数:
f
′
(
x
)
=
f
(
x
)
(
1
−
f
(
x
)
)
f'(x)=f(x)\big(1-f(x)\big)
f′(x)=f(x)(1−f(x)) 第
j
j
j个隐藏层节点的输入:
α
j
=
∑
i
v
i
j
x
i
\alpha_j=\sum_{i}{v_{ij}x_i}
αj=i∑vijxi第j个隐藏层节点的输出:
z
j
=
f
(
α
j
−
θ
j
)
z_j=f\left(\alpha_j-\theta_j\right)
zj=f(αj−θj) 第k个输出层节点的输入:
β
k
=
∑
j
w
j
k
z
j
\beta_k=\sum_{j}{w_{jk}z_j}
βk=j∑wjkzj 第k个输出层节点的输出:
y
k
=
f
(
β
k
−
γ
k
)
y_k=f\left(\beta_k-\gamma_k\right)
yk=f(βk−γk) 输出层的误差函数:
E
=
∑
k
(
y
k
−
t
k
)
2
E=\sum_{k}\left(y_k-t_k\right)^2
E=k∑(yk−tk)2 其中
t
k
t_k
tk是训练集的真实标签。根据反向传播的误差调整节点之间的连接权重,每一个权重的修正方向是误差函数梯度的反方向。设
η
\eta
η为学习率,
Δ
w
j
k
\Delta w_{jk}
Δwjk的计算公式:
Δ
w
j
k
=
−
η
∂
E
∂
w
j
k
=
−
η
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
z
j
\begin{aligned} \Delta w_{jk}&=-\eta\frac{\partial E}{\partial w_{jk}} \\ &=-\eta\left(y_k-t_k\right)y_k(1-y_k)z_j \end{aligned}
Δwjk=−η∂wjk∂E=−η(yk−tk)yk(1−yk)zj
w
j
k
w_{jk}
wjk的更新公式:
w
j
k
n
e
w
=
w
j
k
+
Δ
w
j
k
w_{jk\ new}=w_{jk}+\Delta w_{jk}
wjk new=wjk+Δwjk
Δ
γ
k
\Delta \gamma_k
Δγk的计算公式:
Δ
γ
k
=
−
η
∂
E
∂
γ
k
=
η
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
\begin{aligned} \Delta \gamma_k&=-\eta\frac{\partial E}{\partial \gamma_k} \\ &=\eta\left(y_k-t_k\right)y_k(1-y_k) \end{aligned}
Δγk=−η∂γk∂E=η(yk−tk)yk(1−yk)
γ
k
\gamma_k
γk的更新公式:
γ
k
n
e
w
=
γ
k
+
Δ
γ
k
\gamma_{k\ new}=\gamma_k+\Delta \gamma_k
γk new=γk+Δγk
Δ
v
i
j
\Delta v_{ij}
Δvij的计算公式:
Δ
v
i
j
=
−
η
∂
E
∂
v
i
j
=
−
η
∑
k
(
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
w
j
k
)
z
j
(
1
−
z
j
)
x
i
\begin{aligned} \Delta v_{ij}&=-\eta\frac{\partial E}{\partial v_{ij}} \\ &=-\eta \sum_k \big((y_k-t_k)y_k(1-y_k)w_{jk}\big)z_j(1-z_j)x_i \end{aligned}
Δvij=−η∂vij∂E=−ηk∑((yk−tk)yk(1−yk)wjk)zj(1−zj)xi
v
i
j
v_{ij}
vij的更新公式:
v
i
j
n
e
w
=
v
i
j
+
Δ
v
i
j
v_{ij\ new}=v_{ij}+\Delta v_{ij}
vij new=vij+Δvij
Δ
θ
j
\Delta \theta_j
Δθj的计算公式:
Δ
θ
j
=
−
η
∂
E
∂
θ
j
=
η
∑
k
(
(
y
k
−
t
k
)
y
k
(
1
−
y
k
)
w
j
k
)
z
j
(
1
−
z
j
)
\begin{aligned} \Delta \theta_j&=-\eta\frac{\partial E}{\partial \theta_j} \\ &=\eta \sum_k \big((y_k-t_k)y_k(1-y_k)w_{jk}\big)z_j(1-z_j) \end{aligned}
Δθj=−η∂θj∂E=ηk∑((yk−tk)yk(1−yk)wjk)zj(1−zj)
θ
j
\theta_j
θj的更新公式:
θ
j
n
e
w
=
θ
j
+
Δ
θ
j
\theta_{j\ new}=\theta_j+\Delta \theta_j
θj new=θj+Δθj 数据预处理采用 Z-score 算法:
x
n
e
w
=
x
−
μ
σ
x_{new}=\frac{x-\mu}{\sigma}
xnew=σx−μ 其中
μ
\mu
μ是样本均值,
σ
\sigma
σ是样本的标准差,该算法使样本数据符合均值为0,标准差为1的标准正态分布。
BP神经网络的训练步骤如下图所示。
C语言代码
//********bpnn.h file********//
#ifndef BPNN_H
#define BPNN_H
#define MAX_NUM_INPUT 260 // maximum nodes number of input layer
#define MAX_NUM_HIDDEN 100 // maximum nodes number of hidden layer
#define MAX_NUM_OUTPUT 1 // maximum nodes' number of output layer
#define MAX_NUM_LAYER_OUT 260
typedef struct BPNN
{
int trained; // 0 untrained, 1 trained
int num_input; // nodes number of input layer
int num_hidden; // nodes number of hidden layer
int num_output; // nodes number of output layer
double rate; // learning rate
double weight_input_hidden[MAX_NUM_INPUT][MAX_NUM_HIDDEN]; // weight of the input layer to the hidden layer
double weight_hidden_output[MAX_NUM_HIDDEN][MAX_NUM_OUTPUT]; // weight of the hidden layer to the output layer
double threshold_hidden[MAX_NUM_HIDDEN]; // threshold of hidden layer
double threshold_output[MAX_NUM_OUTPUT]; // threshold of output layer
double error[MAX_NUM_OUTPUT]; // error of output of each node
double error_total; // total error
double mean_std[MAX_NUM_INPUT][2]; // mean and standard deviation of training data
}BPNN;
void bpnn_Init(BPNN *bpnn_ptr,int num_input,int num_hidden,int num_output,double learn_rate);
void bpnn_ForwardPropagation(BPNN *bpnn_ptr,const double *data,const double *label,double *layer_out);
void bpnn_BackPropagation(BPNN *bpnn_ptr,const double *layer_out);
void bpnn_Train(BPNN *bpnn_ptr,double *data,double *label,int num_sample,int num_input,int num_hidden,int num_output,double learn_rate,int num_iter);
void bpnn_Predict(BPNN *bpnn_ptr,double *data,double *label,int num_sample);
void bpnn_FileOutput(BPNN *bpnn_ptr,char *model);
void bpnn_LoadModel(BPNN *bpnn_ptr,char *model);
void bpnn_Normalize(BPNN *bpnn_ptr,double *x,int row,int col);
void Min_Max(double *x,int row,int col);
double Zscore(double x,double mean,double std);
double Sigmoid(double x);
#endif
//********bpnn.c file********//
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "math.h"
#include "time.h"
#include "BPNN.h"
/**
* @description: Init bp network.
* @param: bpnn_ptr point to bp network.
* num_input number of properties of each sample and nodes number of input layer.
* num_hidden nodes number of hidden layer.
* num_output number of label of each sample and nodes number of output layer.
* learn_rate learning rate.
* @return: none.
*/
void bpnn_Init(BPNN *bpnn_ptr,int num_input,int num_hidden,int num_output,double learn_rate)
{
int i,j;
bpnn_ptr->trained = 0;
bpnn_ptr->num_input = num_input;
bpnn_ptr->num_hidden = num_hidden;
bpnn_ptr->num_output = num_output;
bpnn_ptr->rate = learn_rate;
bpnn_ptr->error_total = 0;
srand((unsigned)time(NULL));// set random number seed
for (i=0;i<num_input;i++)
{
for(j=0;j<num_hidden;j++)
{
bpnn_ptr->weight_input_hidden[i][j] = ((double)rand())/RAND_MAX-0.5; // init weight to [-0.5, 0.5]
}
}
for (i=0;i<num_hidden;i++)
{
bpnn_ptr->threshold_hidden[i] = 0; // init threshold of hidden layer to 0
for(j=0;j<num_output;j++)
{
bpnn_ptr->weight_hidden_output[i][j] = ((double)rand())/RAND_MAX-0.5; // init weight to [-0.5, 0.5]
}
}
for(j=0;j<num_output;j++)
{
bpnn_ptr->threshold_output[j] = 0; // init threshold of output layer to 0
bpnn_ptr->error[j] = 0; // init error of output to 0
}
}
/**
* @description: forward propagation get output of network.
* @param: bpnn_ptr point to bp network.
* data point to one of training data.
* label point to one of training label.
* layer_out layer_out[i*MAX_NUM_LAYER_OUT+j] output of node j in layer i.
* @return: none.
*/
void bpnn_ForwardPropagation(BPNN *bpnn_ptr,const double *data,const double *label,double *layer_out)
{
int i,j;
double temp;
for(i=0;i<bpnn_ptr->num_input;i++) // calculate output of input layer
{
layer_out[i] = data[i];
}
for(j=0;j<bpnn_ptr->num_hidden;j++) // calculate output of hidden layer.
{
temp = -(bpnn_ptr->threshold_hidden[j]);
for(i=0;i<bpnn_ptr->num_input;i++)
{
temp += (bpnn_ptr->weight_input_hidden[i][j])*layer_out[i];
}
layer_out[MAX_NUM_LAYER_OUT+j] = Sigmoid(temp);
}
bpnn_ptr->error_total = 0;
for(j=0;j<bpnn_ptr->num_output;j++) // calculate output of output layer.
{
temp = -(bpnn_ptr->threshold_output[j]);
for(i=0;i<bpnn_ptr->num_hidden;i++)
{
temp += (bpnn_ptr->weight_hidden_output[i][j])*layer_out[MAX_NUM_LAYER_OUT+i];
}
layer_out[2*MAX_NUM_LAYER_OUT+j] = Sigmoid(temp);
bpnn_ptr->error[j] = layer_out[2*MAX_NUM_LAYER_OUT+j]-label[j];
bpnn_ptr->error_total += 0.5l*(bpnn_ptr->error[j])*(bpnn_ptr->error[j]);
}
}
/**
* @description: back propagation update weight and threshold.
* @param: bpnn_ptr point to bp network.
* layer_out layer_out[i*MAX_NUM_LAYER_OUT+j] output of node j in layer i.
* @return: none.
*/
void bpnn_BackPropagation(BPNN *bpnn_ptr,const double *layer_out)
{
double g[MAX_NUM_OUTPUT],e[MAX_NUM_HIDDEN],t;
double rate;
int i,j;
rate = (bpnn_ptr->rate);
for(i=0;i<bpnn_ptr->num_output;i++)
{
g[i] = (bpnn_ptr->error[i])*(layer_out[2*MAX_NUM_LAYER_OUT+i])*(1-layer_out[2*MAX_NUM_LAYER_OUT+i]);
bpnn_ptr->threshold_output[i] += rate*g[i];
}
for(i=0;i<bpnn_ptr->num_hidden;i++)
{
for(j=0;j<bpnn_ptr->num_output;j++)
{
bpnn_ptr->weight_hidden_output[i][j] += -rate*g[j]*layer_out[MAX_NUM_LAYER_OUT+i];
}
}
for(i=0;i<bpnn_ptr->num_hidden;i++)
{
t = 0;
for(j=0;j<bpnn_ptr->num_output;j++)
{
t += (bpnn_ptr->weight_hidden_output[i][j])*g[j];
}
e[i] = t*layer_out[MAX_NUM_LAYER_OUT+i]*(1-layer_out[MAX_NUM_LAYER_OUT+i]);
bpnn_ptr->threshold_hidden[i] += rate*e[i];
}
for(i=0;i<bpnn_ptr->num_input;i++)
{
for(j=0;j<bpnn_ptr->num_hidden;j++)
{
bpnn_ptr->weight_input_hidden[i][j] += -rate*e[j]*layer_out[i];
}
}
}
/**
* @description: train back propagation network.
* @param: bpnn_ptr point to bp network.
* data point to training data, a row is a sample.
* label point to training label.
* num_sample number of samples.
* num_input number of properties of each sample and nodes number of input layer.
* num_hidden nodes number of hidden layer.
* num_output number of label of each sample and nodes number of output layer.
* learn_rate learning rate.
* num_iter number of iteration.
* @return: none.
*/
void bpnn_Train(BPNN *bpnn_ptr,double *data,double *label,int num_sample,int num_input,int num_hidden,int num_output,double learn_rate,int num_iter)
{
int iter,sample,i;
double layer_out[3][MAX_NUM_LAYER_OUT]; // layer_out[i][j] output of node j in layer i, i = 0 input, i = 1 hidden, i = 2 output
printf("Training...\r\n");
bpnn_Init(bpnn_ptr,num_input,num_hidden,num_output,learn_rate);
bpnn_Normalize(bpnn_ptr,data,num_sample,num_input);
Min_Max(label,num_sample,num_output);
for(iter=0;iter<num_iter;iter++)
{
for(sample=0;sample<num_sample;sample++)
{
bpnn_ForwardPropagation(bpnn_ptr,&data[sample*num_input],&label[sample*num_output],&layer_out[0][0]);
bpnn_BackPropagation(bpnn_ptr,&layer_out[0][0]);
}
if(bpnn_ptr->error_total<0.0000001)
break;
}
bpnn_ptr->trained = 1;
printf("Training over!\r\nerror rate: %.4f\r\niteration times: %d\r\n",bpnn_ptr->error_total,iter);
}
/**
* @description: use network to predict label.
* @param: bpnn_ptr point to bp network.
* data point to one of training data.
* label return label.
* num_sample number of samples.
* @return: none.
*/
void bpnn_Predict(BPNN *bpnn_ptr,double *data,double *label,int num_sample)
{
double layer_out[3][MAX_NUM_LAYER_OUT]; // layer_out[i][j] output of node j in layer i, i = 0 input, i = 1 hidden, i = 2 output
int i,j;
if(bpnn_ptr->trained == 0)
{
printf("Network untrained!");
return;
}
bpnn_Normalize(bpnn_ptr,data,num_sample,bpnn_ptr->num_input); // data have to be normalized
for(i=0;i<num_sample;i++)
{
bpnn_ForwardPropagation(bpnn_ptr,&data[i*(bpnn_ptr->num_input)],&label[i*(bpnn_ptr->num_output)],&layer_out[0][0]);
for(j=0;j<bpnn_ptr->num_output;j++)
{
label[i*(bpnn_ptr->num_output)+j] = layer_out[2][j];
}
}
}
/**
* @description: output information of bp network to "bpnn_out.txt", output model of network to "'model'.bin".
* @param: bpnn_ptr point to bp network.
* model name of model.
* @return: none.
*/
void bpnn_FileOutput(BPNN *bpnn_ptr,char *model)
{
FILE *file = NULL;
int i,j;
file = fopen("bpnn_out.txt","w");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fprintf(file,"Number of nodes in input layer: %d\n",bpnn_ptr->num_input);
fprintf(file,"Number of nodes in hidden layer: %d\n",bpnn_ptr->num_hidden);
fprintf(file,"Number of nodes in output layer: %d\n",bpnn_ptr->num_output);
fprintf(file,"\nHidden layer threshold: ");
for(i=0;i<bpnn_ptr->num_hidden;i++)
{
fprintf(file," %.2lf ",(bpnn_ptr->threshold_hidden[i]));
}
fprintf(file,"\nOutput layer threshold: ");
for(i=0;i<bpnn_ptr->num_output;i++)
{
fprintf(file," %.2lf ",(bpnn_ptr->threshold_output[i]));
}
fprintf(file,"\n\nWeight of input layer to hidden layer: ");
for(i=0;i<bpnn_ptr->num_input;i++)
{
fprintf(file,"\n%d row: ",i);
for(j=0;j<bpnn_ptr->num_hidden;j++)
{
fprintf(file," %.2lf ",(bpnn_ptr->weight_input_hidden[i][j]));
}
}
fprintf(file,"\n\nWeight of input layer to hidden layer: ");
for(i=0;i<bpnn_ptr->num_hidden;i++)
{
fprintf(file,"\n%d row: ",i);
for(j=0;j<bpnn_ptr->num_output;j++)
{
fprintf(file," %.3lf ",(bpnn_ptr->weight_hidden_output[i][j]));
}
}
fprintf(file,"\n\n\"%s\" is network model.",model);
fclose(file);
file = fopen(model,"wb");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fwrite(bpnn_ptr,sizeof(BPNN),1,file);
fclose(file);
}
/**
* @description: load model from "'model'.bin".
* @param: bpnn_ptr point to bp network.
* model name of model.
* @return: none.
*/
void bpnn_LoadModel(BPNN *bpnn_ptr,char *model)
{
FILE *file = NULL;
file = fopen(model,"rb");
if(file == NULL)
{
printf("Error!");
exit(1);
}
fread(bpnn_ptr,sizeof(BPNN),1,file);
fclose(file);
}
/**
* @description: normalize input values.
* @param: bpnn_ptr point to bp network.
* x point to matrix's address.
* row number of samples.
* col number of properties of each sample and nodes number of input layer.
* @return: none.
*/
void bpnn_Normalize(BPNN *bpnn_ptr,double *x,int row,int col)
{
double sum1,sum2,mean,std;
int i,j;
if(bpnn_ptr->trained)
{
for(j=0;j<col;j++)
{
for(i=0;i<row;i++)
{
x[i*col+j] = Zscore(x[i*col+j],bpnn_ptr->mean_std[j][0],bpnn_ptr->mean_std[j][1]);
}
}
return;
}
for(j=0;j<col;j++)
{
sum1 = 0;
sum2 = 0;
for(i=0;i<row;i++)
{
sum1 += x[i*col+j];
sum2 += x[i*col+j]*x[i*col+j];
}
mean = sum1/row;
std = pow((sum2/row)-(mean*mean),0.5);
bpnn_ptr->mean_std[j][0] = mean; // mean value
bpnn_ptr->mean_std[j][1] = std; // standard deviation
for(i=0;i<row;i++)
{
x[i*col+j] = Zscore(x[i*col+j],mean,std);
}
}
}
/**
* @description: min-max normalization, normalizing the minimum and maximum values of data to [0, 1].
* @param: x point to matrix's address.
* row number of samples.
* col number of properties of each sample and nodes number of input layer.
* @return: none.
*/
void Min_Max(double *x,int row,int col)
{
double max,min,temp;
int i,j;
for(j=0;j<col;j++)
{
max = x[j];
min = x[j];
for(i=0;i<row;i++)
{
temp = x[i*col+j];
max = (temp>max)?temp:max;
min = (temp<min)?temp:min;
}
for(i=0;i<row;i++)
{
temp = x[i*col+j];
x[i*col+j] = (temp-min)/(max-min);
}
}
}
/**
* @description: Z-score normalization.
* @param: x data.
* mean mean value.
* std standard deviation.
* @return: function output.
*/
double Zscore(double x,double mean,double std)
{
return (x-mean)/std;
}
/**
* @description: sigmoid function.
* @param: x input variable.
* @return: function output.
*/
double Sigmoid(double x)
{
return 1.0l/(1.0l+exp(-x));
}
核心代码都贴出来了,代码里每个函数都有注释,因为vscode的中文时不时有乱码,所以翻译成英文了,机翻的可能不标准。使用时,先将训练数据集和测试数据集读到数组(数组格式请看代码注释)里,然后调用bpnn_Train函数训练网络,再调用bpnn_predict函数预测测试数据,其他函数功能请看注释。
完整代码下载地址:三层BP神经网络C语言代码 。