bp神经网络学习很慢,所以用adam进行优化,测试代码如下,该代码用来求解二次方程,,当然可以训练出一个输入x^2+2*x得出x的神经网络:
#include <conio.h>
#include "bp.hpp" // 神经网络头文件
int main(int argc, char** argv)
{
double d_pre_max = 0.0;
double d_max = -1 * DBL_MAX;
using net_type = net<1, 200, 1>; // 定义神经网络类型
net_type n; // 创建神经网络
net_type::ret_type& mt_delta = n.get_delta(); // 引用,这样就永远不需要复制了
int i_max_repeat_count= 0;
for (double i = 0; ; i+=1)
{
for (double j = 0.1; j < 0.6; j+=.01)
{
double d_out = j;
net_type::ret_type mt_expected({ d_out }); // 期望返回值
net_type::input_type mt_input = { j * j + 2. * j };
auto mt_out = n.forward(mt_input);
n.backward(mt_expected);
d_max = d_max < mt_delta.max_abs() ? mt_delta.max_abs() : d_max;
}
std::cout << d_max << std::endl;
i_loop_num++;
if (d_max < 1e-3/* || abs(d_pre_max - d_max) < 1e-11 */)
{
i_max_repeat_count++;
std::cout << "-------" << d_max << "---------" << std::endl;
if (i_max_repeat_count> 10)
break;
}
else
{
i_max_repeat_count--;
if (i_max_repeat_count < 0)ooooo = 0;
}
d_pre_max = d_max;
d_max = -1 * DBL_MAX;
}
for (double j = 0.1; j < 0.6; j += .1)
{
net_type::input_type mt_input = { j * j , 2 * j };
auto mt_out = n.forward(mt_input);
mt_out.print();
}
std::cout << "loop num:" << i_loop_num << std::endl;
_getch();
return 0;
}
以上的函数运行还行,就是并没有想像的那么快,模拟出上面那个函数都需要很长的时间。基础函数:
#ifndef _BASE_FUNCTION_HPP_
#define _BASE_FUNCTION_HPP_
template<typename func_t>
auto derivative(func_t&& f, const decltype(f(0))& v)
{
constexpr double SMALL_VAL = 1e-11;
return (f(v + SMALL_VAL) - f(v - SMALL_VAL)) / (2.*SMALL_VAL);
}
template<typename val_t = double>
val_t f_sigmoid(const val_t& v)
{
return 1. / (1. + exp(-1. * v));
}
template<int r, int c>
class n_sigmoid
{
public:
template<typename imatt, typename vt = double>
static vt cal(const imatt& mt)
{
return f_sigmoid(mt.get(r, c));
}
};
template<int r, int c>
class n_desigmoid
{
public:
template<typename imatt, typename vt = double>
static vt cal(const imatt& mt)
{
return derivative(f_sigmoid<vt>, mt.get(r, c));
}
};
template<typename func_t>
struct der_func
{
static constexpr double SMALL_VAL = 1e-7;
static typename func_t::ret_type cal(const typename func_t::input_type& v)
{
return (func_t::cal(v + SMALL_VAL) - func_t::cal(v - SMALL_VAL)) / (2.*SMALL_VAL);
}
};
struct func_sigmoid
{
using ret_type = double;
using input_type = double;
template<typename vt = double>
static vt cal(const vt& v)
{
return f_sigmoid(v);
}
};
#include "base_logic.hpp"
template<typename func_t>
class func_pack
{
public:
template<int r, int c>
struct normal_t
{
template<typename imatt, typename vt = double>
static vt cal(const imatt& mt)
{
return func_t::cal(mt.get(r,c));
}
};
template<int r, int c>
struct derivative_t
{
template<typename imatt, typename vt = double>
static vt cal(const imatt& mt)
{
return der_func<func_t>::cal(mt.get(r, c));
}
};
template<int row_num, int col_num, typename val_t = double>
mat<row_num, col_num, val_t> normal(const mat<row_num, col_num, val_t>& mt)
{
using omatt = mat<row_num, col_num, val_t>;
omatt mt_ret;
col_loop<col_num - 1, normal_t>(mt_ret, mt);
return mt_ret;
}
template<int row_num, int col_num, typename val_t = double>
mat<row_num, col_num, val_t> derivative(const mat<row_num, col_num, val_t>& mt)
{
using omatt = mat<row_num, col_num, val_t>;
omatt mt_ret;
col_loop<col_num - 1, derivative_t>(mt_ret, mt);
return mt_ret;
}
};
/* 点乘运算 */
template<int r1, int c1, int r2, int c2, typename imatt1, typename imatt2, typename vt = double>
inline vt n_dot(const imatt1& mt1, const imatt2& mt2)
{
static_assert(c1 =&