C++元编程——BP神经网络初始化

腾昵猫

已于 2022-11-15 14:39:58 修改

阅读量359

点赞数

分类专栏：元编程学习实践文章标签： c++ 神经网络算法

于 2022-11-09 19:24:11 首次发布

本文链接：https://blog.csdn.net/Dr_Jack/article/details/127776013

版权

本文介绍了如何在C++中为BP神经网络实现可扩展的初始化方法，包括Xavier和He的初始化策略。通过代码示例展示了神经网络的修改，以及权重初始化模板的定义。此外，还提到了基础逻辑、矩阵定义、更新算法的实现，并指出该实现为后续研究卷积神经网络、预处理和RNN等提供了灵活性。

摘要由CSDN通过智能技术生成

初始化对于神经网络还比较重要，为了实现可扩展的初始化方法，在原有的神经网络上进行了一些小修改，增加了可扩展的方法来对网络进行初始化。话不多说，上代码。下面是试验代码(用到了boost的timer进行训练时间计算)：

#include <stdio.h>
#include <conio.h>
#include <iostream>

#include <boost/timer/timer.hpp>

#include "bp.hpp"
#include "activate_function.hpp"

int main(int argc, char** argv) 
{
	int i_loop_num = 1;
	
	using net_type = net<adam, ReLu, HeMean, 1, 80, 1>;					// 用nadam优化的ReLu激活函数bp神经网络类型定义
	net_type n;		
	net_type::ret_type& mt_delta = n.get_delta();			// 引用，这样就永远不需要复制了
	int i_max_repeat_count = 0;
	double d_pre_max = 0.0;
	double d_max = -1 * DBL_MAX;
	double d_precious = 0.005;
	bool bb = false;
	boost::timer tmr;
	for (double i = 0; ; i+=1) 
	{
		double d_max_error_j = 0.1;
		for (double j = 0.1; j < 0.6; j+= d_precious)
		{
				double d_out = j;
				net_type::ret_type mt_expected({ d_out });		// 期望返回值
				net_type::input_type mt_input = { j * j * j + 2. * j + 1 };
				auto mt_out = n.forward(mt_input);
				if (!bb)
					n.backward(mt_expected);
				if (d_max < mt_delta.max_abs())
				{
					d_max_error_j = j;
				}
				if (bb) 
				{
					mt_delta.print();
				}
				d_max = d_max < mt_delta.max_abs() ? mt_delta.max_abs() : d_max;
		}
		if (i_loop_num % 100 == 0)
		std::cout << std::setw(5) << i_loop_num << "|" << d_max << std::endl;
		if (bb)break;
		i_loop_num++;
		if (d_max < d_precious/2./* || abs(d_pre_max - d_max) < 1e-11 */)
		{
			bb = true;
			continue;
		}
		d_pre_max = d_max;
		d_max = -1 * DBL_MAX;

		for (int k = 0; k < 3; ++k)							// 最大误差项进行加强训练
		{
			double d_out = d_max_error_j;
			net_type::ret_type mt_expected({ d_out });		// 期望返回值
			net_type::input_type mt_input = { d_max_error_j * d_max_error_j * d_max_error_j + 2. * d_max_error_j + 1 };
			auto mt_out = n.forward(mt_input);
			n.backward(mt_expected);
		}
	}
	std::cout << "span:" << tmr.elapsed() << "\tloop num:" << i_loop_num << std::endl;
	_getch();
	return 0;
}

效果令人震惊。一元三次方程的求解只需要100次不到的训练即可完成。运行结果如下：

下面展示一下详细的修改。首先是BP神经网络的修改：

#ifndef _BP_HPP_
#define _BP_HPP_

#include <initializer_list>
#include <iomanip>

#include "mat.hpp"
#include "base_function.hpp"
#include "base_logic.hpp"
#include "update_methods.hpp"
#include "weight_initilizer.hpp"

template<template<typename> class update_method_templ, template<typename> class activate_func, typename init_name_t, int i1, int i2, int...is>
struct net 
{
	mat<i2, i1> mt_weight;
	mat<i1, 1> mt_in;
	mat<i2, 1> mt_b;
	using next_net_t = net<update_method_templ, activate_func, init_name_t, i2, is...>;
	next_net_t net_next;
	update_method_templ<mat<i2, i1>> ad;
	update_method_templ<mat<i2, 1>> adb;
	activate_func<mat<i2, 1>>	act_func;

	using input_type = mat<i1, 1>;									// 输入类型
	using ret_type = typename next_net_t::ret_type;					// 网络返回类型
	static constexpr int out_dim = next_net_t::out_dim;

	net():net_next(), mt_weight(), ad(), adb()
	{
		weight_initilizer<init_name_t>::cal(mt_weight);
	}

	inline auto forward(const mat<i1, 1>& mt_input) 
	{
		mt_in = mt_input;
		return net_next.forward(act_func.forward(dot(mt_weight, mt_input) + mt_b));
	}

	inline auto update(const mat<i2, 1>& mt_delta) 
	{
		/*更新权值矩阵*/
		auto mt_desig_origin = act_func.backward();
		auto mt_desig = mt_desig_origin * mt_delta;							// 回传误差与sigmoid导数相乘的值
		auto mt_update = dot(mt_desig, mt_in.t());							// 计算权值变化矩阵
		auto mt_ret = dot(mt_weight.t(), mt_desig);
		mt_weight = ad.update(mt_weight, mt_update);
		mt_b = adb.update(mt_b, mt_desig);									// 更新偏移量
		return mt_ret;
	}

	inline auto backward(const mat<out_dim, 1>& mt_expected)
	{
		auto mt_delta = net_next.backward(mt_expected);
		return update(mt_delta);
	}

	inline ret_type& get_delta()
	{
		return net_next.get_delta();
	}

	void print() 
	{
		mt_weight.print();
		net_next.print();
	}
};

template<template<typename> class update_method_templ, template<typename> class activate_func, typename init_name_t, int i1, int i2>
struct net<update_method_templ, activate_func, init_name_t, i1, i2>
{
	mat<i2, i1> mt_weight;
	mat<i1, 1> mt_in;
	mat<i2, 1> mt_out;
	mat<i2, 1> mt_b;					// 常数项
	mat<i2, 1> mt_delta;
	update_method_templ<mat<i2, i1>> ad;
	update_method_templ<mat<i2, 1>> adb;
	activate_func<mat<i2, 1>>	act_func;
	
	static constexpr int out_dim = i2;
	using ret_type = mat<i2, 1>;

	net() :mt_weight(), ad(), adb()
	{
		weight_initilizer<init_name_t>::cal(mt_weight);
	}

	inline auto forward(const mat<i1, 1>& mt_input)
	{
		mt_in = mt_input;
		mt_out = act_func.forward(dot(mt_weight, mt_input) + mt_b);
		return mt_out;
	}

	inline auto update(const mat<i2, 1>& mt_delta)
	{
		/*更新权值矩阵*/
		auto mt_desig_origin = act_func.backward();
		auto