初始化对于神经网络还比较重要,为了实现可扩展的初始化方法,在原有的神经网络上进行了一些小修改,增加了可扩展的方法来对网络进行初始化。话不多说,上代码。下面是试验代码(用到了boost的timer进行训练时间计算):
#include <stdio.h>
#include <conio.h>
#include <iostream>
#include <boost/timer/timer.hpp>
#include "bp.hpp"
#include "activate_function.hpp"
int main(int argc, char** argv)
{
int i_loop_num = 1;
using net_type = net<adam, ReLu, HeMean, 1, 80, 1>; // 用nadam优化的ReLu激活函数bp神经网络类型定义
net_type n;
net_type::ret_type& mt_delta = n.get_delta(); // 引用,这样就永远不需要复制了
int i_max_repeat_count = 0;
double d_pre_max = 0.0;
double d_max = -1 * DBL_MAX;
double d_precious = 0.005;
bool bb = false;
boost::timer tmr;
for (double i = 0; ; i+=1)
{
double d_max_error_j = 0.1;
for (double j = 0.1; j < 0.6; j+= d_precious)
{
double d_out = j;
net_type::ret_type mt_expected({ d_out }); // 期望返回值
net_type::input_type mt_input = { j * j * j + 2. * j + 1 };
auto mt_out = n.forward(mt_input);
if (!bb)
n.backward(mt_expected);
if (d_max < mt_delta.max_abs())
{
d_max_error_j = j;
}
if (bb)
{
mt_delta.print();
}
d_max = d_max < mt_delta.max_abs() ? mt_delta.max_abs() : d_max;
}
if (i_loop_num % 100 == 0)
std::cout << std::setw(5) << i_loop_num << "|" << d_max << std::endl;
if (bb)break;
i_loop_num++;
if (d_max < d_precious/2./* || abs(d_pre_max - d_max) < 1e-11 */)
{
bb = true;
continue;
}
d_pre_max = d_max;
d_max = -1 * DBL_MAX;
for (int k = 0; k < 3; ++k) // 最大误差项进行加强训练
{
double d_out = d_max_error_j;
net_type::ret_type mt_expected({ d_out }); // 期望返回值
net_type::input_type mt_input = { d_max_error_j * d_max_error_j * d_max_error_j + 2. * d_max_error_j + 1 };
auto mt_out = n.forward(mt_input);
n.backward(mt_expected);
}
}
std::cout << "span:" << tmr.elapsed() << "\tloop num:" << i_loop_num << std::endl;
_getch();
return 0;
}
效果令人震惊。一元三次方程的求解只需要100次不到的训练即可完成。运行结果如下:
下面展示一下详细的修改。首先是BP神经网络的修改:
#ifndef _BP_HPP_
#define _BP_HPP_
#include <initializer_list>
#include <iomanip>
#include "mat.hpp"
#include "base_function.hpp"
#include "base_logic.hpp"
#include "update_methods.hpp"
#include "weight_initilizer.hpp"
template<template<typename> class update_method_templ, template<typename> class activate_func, typename init_name_t, int i1, int i2, int...is>
struct net
{
mat<i2, i1> mt_weight;
mat<i1, 1> mt_in;
mat<i2, 1> mt_b;
using next_net_t = net<update_method_templ, activate_func, init_name_t, i2, is...>;
next_net_t net_next;
update_method_templ<mat<i2, i1>> ad;
update_method_templ<mat<i2, 1>> adb;
activate_func<mat<i2, 1>> act_func;
using input_type = mat<i1, 1>; // 输入类型
using ret_type = typename next_net_t::ret_type; // 网络返回类型
static constexpr int out_dim = next_net_t::out_dim;
net():net_next(), mt_weight(), ad(), adb()
{
weight_initilizer<init_name_t>::cal(mt_weight);
}
inline auto forward(const mat<i1, 1>& mt_input)
{
mt_in = mt_input;
return net_next.forward(act_func.forward(dot(mt_weight, mt_input) + mt_b));
}
inline auto update(const mat<i2, 1>& mt_delta)
{
/*更新权值矩阵*/
auto mt_desig_origin = act_func.backward();
auto mt_desig = mt_desig_origin * mt_delta; // 回传误差与sigmoid导数相乘的值
auto mt_update = dot(mt_desig, mt_in.t()); // 计算权值变化矩阵
auto mt_ret = dot(mt_weight.t(), mt_desig);
mt_weight = ad.update(mt_weight, mt_update);
mt_b = adb.update(mt_b, mt_desig); // 更新偏移量
return mt_ret;
}
inline auto backward(const mat<out_dim, 1>& mt_expected)
{
auto mt_delta = net_next.backward(mt_expected);
return update(mt_delta);
}
inline ret_type& get_delta()
{
return net_next.get_delta();
}
void print()
{
mt_weight.print();
net_next.print();
}
};
template<template<typename> class update_method_templ, template<typename> class activate_func, typename init_name_t, int i1, int i2>
struct net<update_method_templ, activate_func, init_name_t, i1, i2>
{
mat<i2, i1> mt_weight;
mat<i1, 1> mt_in;
mat<i2, 1> mt_out;
mat<i2, 1> mt_b; // 常数项
mat<i2, 1> mt_delta;
update_method_templ<mat<i2, i1>> ad;
update_method_templ<mat<i2, 1>> adb;
activate_func<mat<i2, 1>> act_func;
static constexpr int out_dim = i2;
using ret_type = mat<i2, 1>;
net() :mt_weight(), ad(), adb()
{
weight_initilizer<init_name_t>::cal(mt_weight);
}
inline auto forward(const mat<i1, 1>& mt_input)
{
mt_in = mt_input;
mt_out = act_func.forward(dot(mt_weight, mt_input) + mt_b);
return mt_out;
}
inline auto update(const mat<i2, 1>& mt_delta)
{
/*更新权值矩阵*/
auto mt_desig_origin = act_func.backward();
auto