常用激活函数:
- Sigmoid 函数: σ ( x ) = 1 1 + e − x \sigma(x) = \frac{1}{1+e^{-x}} σ(x)=1+e−x1
- ReLU 函数: f ( x ) = max ( 0 , x ) f(x) = \max(0,x) f(x)=max(0,x)
- Tanh 函数: tanh ( x ) = e x − e − x e x + e − x \tanh(x) = \frac{e^x-e^{-x}}{e^x+e^{-x}} tanh(x)=ex+e−xex−e−x
/*
这是一份用C++实现的人工神经网络的代码,使用的是前向传播算法和平均平方误差(MSE)损失函数。该代码实现了一个二层的神经网络,拥有两个隐藏层节点和一个输出节点。具体的,首先通过私有成员变量确定网络的超参数(学习率和训练轮数),并对权重和偏移量进行初始化。随后是前向传播过程,在这个过程中,网络会根据输入数据和计算出的权重以及偏移量,通过激活函数(sigmoid)计算出网络的预测输出。在训练过程中,网络首先通过前向传播得出预测输出,然后再通过反向传播和梯度下降法更新权重和偏移量,从而使得网络的预测更准确。最后,网络可以用于预测新数据。
Sigmoid 函数:$\sigma(x) = \frac{1}{1+e^{-x}}$
ReLU 函数:$f(x) = \max(0,x)$
Tanh 函数:$\tanh(x) = \frac{e^x-e^{-x}}{e^x+e^{-x}}$
*/
#include<bits/stdc++.h>
using namespace std;
#include<windows.h>
#include<graphics.h>
#define SCR_WIDTH 800 // 窗口宽度
#define SCR_HEIGHT 600 // 窗口高度
inline void srandd(int seed=0) {srand(seed?seed:time(0));}
inline double randd() {return (double)rand()/RAND_MAX;}
inline double randd(double l, double r) {return randd()*(r+.99999999999999-l)+l;}
double trans(double&x) {
return x*50;//log(fabs(x)+1)*100*fabs(x*50)/float(x*50);
}
double getMSEloss(double x1,double x2){
return (x1 - x2)*(x1 - x2);
}
class NNetwork
{
public:
int epoches;
double learning_rate;
double w1,w2,w3,w4,w5,w6;
double b1,b2,b3;
public:
NNetwork(int es,double lr);
double sigmoid(double x);
double deriv_sigmoid(double x);
double forward(vector<double> data);
void train(vector<vector<double>> data,vector<double> label);
void predict(vector<vector<double>> test_data,vector<double> test_label);
};
NNetwork::NNetwork(int es,double lr):epoches(es),learning_rate(lr){
// 超参数、参数初始化
w1=w2=w3=w4=w5=w6=0;
b1=b2=b3=0;
}
double NNetwork::sigmoid(double x){
// 激活函数
return 1/(1+exp(-x));
}
double NNetwork::deriv_sigmoid(double x){
// 激活函数求导
double y = sigmoid(x);
return y*(1-y);
}
double NNetwork::forward(vector<double> data){
// 前向传播
double sum_h1 = w1 * data[0] + w2 * data[1] + b1;
double h1 = sigmoid(sum_h1);
double sum_h2 = w3 * data[0] + w4 * data[1] + b2;
double h2 = sigmoid(sum_h2);
double sum_o1 = w5 * h1 + w6 * h2 + b3;
return sigmoid(sum_o1);
}
void NNetwork::train(vector<vector<double>> data,vector<double> label){
for(int epoch=0;epoch<epoches;++epoch){
int total_n = data.size();
for(int i=0;i<total_n;++i){
vector<double> x = data[i];
double sum_h1 = w1 * x[0] + w2 * x[1] + b1;
double h1 = sigmoid(sum_h1);
double sum_h2 = w3 * x[0] + w4 * x[1] + b2;
double h2 = sigmoid(sum_h2);
double sum_o1 = w5 * h1 + w6 * h2 + b3;
double o1 = sigmoid(sum_o1);
double pred = o1;
double d_loss_pred = -2 * (label[i] - pred);
double d_pred_w5 = h1 * deriv_sigmoid(sum_o1);
double d_pred_w6 = h2 * deriv_sigmoid(sum_o1);
double d_pred_b3 = deriv_sigmoid(sum_o1);
double d_pred_h1 = w5 * deriv_sigmoid(sum_o1);
double d_pred_h2 = w6 * deriv_sigmoid(sum_o1);
double d_h1_w1 = x[0] * deriv_sigmoid(sum_h1);
double d_h1_w2 = x[1] * deriv_sigmoid(sum_h1);
double d_h1_b1 = deriv_sigmoid(sum_h1);
double d_h2_w3 = x[0] * deriv_sigmoid(sum_h2);
double d_h2_w4 = x[1] * deriv_sigmoid(sum_h2);
double d_h2_b2 = deriv_sigmoid(sum_h2);
w1 -= learning_rate * d_loss_pred * d_pred_h1 * d_h1_w1;
w2 -= learning_rate * d_loss_pred * d_pred_h1 * d_h1_w2;
b1 -= learning_rate * d_loss_pred * d_pred_h1 * d_h1_b1;
w3 -= learning_rate * d_loss_pred * d_pred_h2 * d_h2_w3;
w4 -= learning_rate * d_loss_pred * d_pred_h2 * d_h2_w4;
b2 -= learning_rate * d_loss_pred * d_pred_h2 * d_h2_b2;
w5 -= learning_rate * d_loss_pred * d_pred_w5;
w6 -= learning_rate * d_loss_pred * d_pred_w6;
b3 -= learning_rate * d_loss_pred * d_pred_b3;
}
if(epoch%10==0){
double loss = 0;
for(int i=0;i<total_n;++i){
double pred = forward(data[i]);
loss += getMSEloss(pred,label[i]);
}
cout<<"epoch "<<epoch<<" loss: "<<loss<<endl;
delay_ms(1);
}
int xx=SCR_WIDTH*epoch*1./epoches;
putpixel_f(xx,SCR_HEIGHT/2-trans(w1),0xffff00);
putpixel_f(xx,SCR_HEIGHT/2-trans(w2),0xff00ff);
putpixel_f(xx,SCR_HEIGHT/2-trans(b1),0x00ffff);
putpixel_f(xx,SCR_HEIGHT/2-trans(w3),0x0000ff);
putpixel_f(xx,SCR_HEIGHT/2-trans(w4),0x00ff00);
putpixel_f(xx,SCR_HEIGHT/2-trans(b2),0xff0000);
putpixel_f(xx,SCR_HEIGHT/2-trans(w5),0x888888);
putpixel_f(xx,SCR_HEIGHT/2-trans(w6),0xAAAAAA);
putpixel_f(xx,SCR_HEIGHT/2-trans(b3),0xCCCCCC);
}
}
void NNetwork::predict(vector<vector<double>> test_data,vector<double> test_label){
int n = test_data.size();
double cnt = 0;
for(int i=0;i<n;++i){
double pred = forward(test_data[i]);
pred = pred>0.5?1:0;
cnt += (test_label[i]==pred);
}
cout<<"correct rate:"<<cnt/n<<endl;
}
int main(){
// 可视化初始化
// setinitmode(INIT_NOBORDER, 100, 100);
initgraph(SCR_WIDTH, SCR_HEIGHT, INIT_RENDERMANUAL | INIT_NOFORCEEXIT);
setcaption("Title here ~");
// 随机造数据方法设置
srandd();
auto make_data=[](vector<vector<double>>&data,vector<double>&label) {
for(int i=10; i; i--) {
double x=randd(-100,100);
double y=randd(-100,100);
data.push_back({x,y});
label.push_back((x*x+y-8)>0?1:0);
}
};
// 造训练数据
// vector<vector<double>> data = {{-2,-1},{25,6},{17,4},{-15,-6},{3,-6},{7,-6},{1,-1.1},{3.6,-3.4}};
// vector<double> label = {1,0,0,1,1,0,1,0};
vector<vector<double>> data;
vector<double> label;
make_data(data,label);
// 训练
NNetwork network = NNetwork(10000,0.6);
network.train(data,label);
// 造测试数据
// vector<vector<double>> test_data = {{-3,-4},{-5,-4},{12,3},{-13,-4},{9,12}};
// vector<double> test_label = {1,1,0,1,0};
vector<vector<double>> test_data;
vector<double> test_label;
make_data(test_data,test_label);
// 测试
network.predict(test_data,test_label);
cerr<<clock()<<" ms"<<endl;
cerr<<"w1 = "<<network.w1<<endl;
cerr<<"w2 = "<<network.w2<<endl;
cerr<<"b1 = "<<network.b1<<endl;
cerr<<"w3 = "<<network.w3<<endl;
cerr<<"w4 = "<<network.w4<<endl;
cerr<<"b2 = "<<network.b2<<endl;
cerr<<"w5 = "<<network.w5<<endl;
cerr<<"w6 = "<<network.w6<<endl;
cerr<<"b3 = "<<network.b3<<endl;
this_thread::sleep_for(chrono::seconds(3));
getch();
closegraph();
return 0;
}