【学习笔记】BP神经网络

最新推荐文章于 2024-07-10 21:16:19 发布

Keyu Tian

最新推荐文章于 2024-07-10 21:16:19 发布

阅读量231

点赞数

分类专栏： X | 学习笔记 B | BP网络 M | ML (DL & RL) 文章标签： BP 神经网络前馈神经网络机器学习人工智能

本文链接：https://blog.csdn.net/weixin_42102584/article/details/83480228

版权

X | 学习笔记同时被 3 个专栏收录

3 篇文章 0 订阅

订阅专栏

B | BP网络

1 篇文章 0 订阅

订阅专栏

M | ML (DL & RL)

1 篇文章 0 订阅

订阅专栏

嗯嗯... 看了 3Blue1Brown 的神经网络结构简介视频之后，觉得这个人工ZZ网络好好玩啊！遂入坑。

这是视频链接：https://www.bilibili.com/video/av15532370?from=search&seid=10137053428682538306

先挂一段代码... 这两天苦受各种DDL欺压，只能稍后再详细记录我的学习历程啦~

/* BP网络 by Kevin
 *
 * Supervised Learning 
 * for Classification (Multi-class Classification)
 *
 * 矩阵类：自敲，模板类（便于编译器检查行列匹配等问题） 
 * 隐含层层数：两层
 * 隐含层神经元个数：自定 
 * Tranint Data：4位二进制数 <-> 对应位置 
 * 前馈调节方式：梯度下降法 
 */


#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <ctime>
#include <vector>

typedef unsigned int uint;

template <uint R, uint C, typename Type = double>
class Matrix
{
	public:

		Type m[R][C];

	public:

		Matrix(void) { }
		Matrix(const Type val)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] = val;
		}
		Matrix(const Type inf, const Type sup)
		{
			const Type interval = (sup-inf) / RAND_MAX;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] = inf + interval * rand();
		}
		Matrix(const Matrix<R, C> &o)
		{
			memcpy(m, o.m, sizeof(m));
		}

		void random_init(const Type inf, const Type sup)
		{
			const Type interval = (sup-inf) / RAND_MAX;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] = inf + interval * rand();
		}

		void random_fix(const Type d)
		{
			const Type interval = 2*d / RAND_MAX;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					if (rand() < RAND_MAX/4)
						m[r][c] += interval * rand() - d;
		}

		inline Matrix<R, C> & operator =(const Matrix<R, C> &o)
		{
			if (this != &o)
				memcpy(m, o.m, sizeof(m));
			return *this;
		}

		inline auto operator [](int index) -> decltype(*m)
		{
			return m[index];
		}

		Matrix<C, R> transpose(void) const
		{
			Matrix<C, R> ret;
			for (int c=0; c<C; ++c)
				for (int r=0; r<R; ++r)
					ret.m[c][r] = m[r][c];
			return ret;
		}

		Matrix<C, R> operator ~(void) const
		{
			Matrix<C, R> ret;
			for (int c=0; c<C; ++c)
				for (int r=0; r<R; ++r)
					ret.m[c][r] = m[r][c];
			return ret;
		}

		Matrix<R, C> operator +(const Matrix<R, C> &o) const
		{
			Matrix<R, C> ret;

			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					ret.m[r][c] = m[r][c] + o.m[r][c];

			return ret;
		}

		const Matrix<R, C> & operator +=(const Matrix<R, C> &o)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] += o.m[r][c];

			return *this;
		}

		Matrix<R, C> operator -(const Matrix<R, C> &o) const
		{
			Matrix<R, C> ret;

			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					ret.m[r][c] = m[r][c] - o.m[r][c];

			return ret;
		}

		const Matrix<R, C> & operator -=(const Matrix<R, C> &o)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] -= o.m[r][c];

			return *this;
		}

		// {R, C} * {C, C1}
		template <uint C1>
		Matrix<R, C1> operator *(const Matrix<C, C1> &o) const
		{
			Matrix<R, C1> ret;

			for (int r=0; r<R; ++r)
				for (int c1=0; c1<C1; ++c1)
				{
					ret.m[r][c1] = *m[r] * (*m)[c1];
					for (int c=1; c<C; ++c)
						ret.m[r][c1] += m[r][c] * o.m[c][c1];
				}

			return ret;
		}

		// {R, C} ^ {R, C}
		Matrix<R, C> operator ^(const Matrix<R, C> &o) const
		{
			Matrix<R, C> ret;

			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					ret.m[r][c] = m[r][c] * o.m[r][c];

			return ret;
		}

		const Matrix<R, C> & operator ^=(const Matrix<R, C> &o)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] *= o.m[r][c];

			return *this;
		}

		friend Matrix<R, C> operator *(const Type val, const Matrix<R, C> &o)
		{
			Matrix<R, C> ret;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					ret.m[r][c] = o.m[r][c] * val;
			return ret;
		}

		Matrix<R, C> operator *(const Type val) const
		{
			Matrix<R, C> ret;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					ret.m[r][c] = m[r][c] * val;
			return ret;
		}

		const Matrix<R, C> & operator *=(const Type val)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					m[r][c] *= val;
			return *this;
		}

		// {R, C} * {R1, C1}
		template<uint R1, uint C1>
		Matrix <R * R1, C * C1> kronecker(const Matrix<R1, C1> &o) const
		{
			Matrix<R * R1, C * C1> ret;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					for (int r1=0; r1<R1; ++r1)
						for (int c1=0; c1<C1; ++c1)
							ret.m[r*R1 + r1][c*C1 + c1] = m[r][c] * o.m[r1][c1];
			return ret;
		}

		Type sq_sum(void) const
		{
			Type sum = 0.0;
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					sum += m[r][c] * m[r][c];
			return sum;
		}

		void println(void) const
		{
			for (int r=0; r<R; ++r, putchar(10))
				for (int c=0; c<C; ++c)
					printf("%f ", m[r][c]);
		}

		template <class Lambda>
		void for_each(Lambda lambda)
		{
			for (int r=0; r<R; ++r)
				for (int c=0; c<C; ++c)
					lambda(m[r][c]);
		}

		static constexpr double logsig(double x)
		{
			return 1 / (1 + exp(-x));
		}

		Matrix<R, C> actFunc(void) const
		{
			Matrix<R, C> ret;
			for (int c=0; c<C; ++c)
				(*ret.m)[c] = 1 / (1 + exp(-(*m)[c]));
			return ret;
		}

		Matrix<R, C> dActFunc(void) const
		{
			Matrix<R, C> ret;
			for (int c=0; c<C; ++c)
			{
				double temp = exp((*m)[c]);
				(*ret.m)[c] = temp / ((1+temp)*(1+temp));
			}
			return ret;
		}
};

void read_std_data(void);
void print_std_data(void);




constexpr int TEST_AMT(16);

constexpr int INPUT_AMT(4);
constexpr int OUTPUT_AMT(16);

constexpr float LEARNING_RATE(0.05);
constexpr double ERROR_LIMIT(0.0000001);





typedef Matrix<1, INPUT_AMT>  InputsType[TEST_AMT];
typedef Matrix<1, OUTPUT_AMT> OutputsType[TEST_AMT];


InputsType  stdInputs;
OutputsType stdOutputs;


template <uint L1_AMT, uint L2_AMT>
class BP
{
	private:

		int _i = INPUT_AMT;
		int _h1 = L1_AMT;
		int _h2 = L2_AMT;
		int _o = OUTPUT_AMT;

		InputsType		&inputs;		// 训练用输入（向量）
		OutputsType		&outputs;		// 训练用输入（向量）

		Matrix<  1	,		L1_AMT>			B1h1;	 // 第一层的偏置系数（向量）
		Matrix<INPUT_AMT,	L1_AMT>			Wih1;	 // 输入层和第一隐含层的突触（矩阵）
		Matrix<  1	,		L1_AMT>			H1h1;	 // 第一隐含层神经元（向量）
		Matrix<  1	,		L2_AMT>			B1h2;	 // 第二层的偏置系数（向量）
		Matrix<L1_AMT,		L2_AMT>			Wh1h2;	 // 第一和第二隐含层间的突触（矩阵）
		Matrix<  1	,		L2_AMT>			H1h2;	 // 第二隐含层神经元（向量）
		Matrix<  1	,		OUTPUT_AMT>		B1o;	 // 输出层的偏置系数（向量）
		Matrix<L2_AMT,		OUTPUT_AMT>		Wh2o;	 // 第二隐含层和输出层的突触（矩阵）

	public:

		BP(void) :
			inputs(stdInputs), outputs(stdOutputs) { }

		void init_inputs_and_outputs(const InputsType &inputs, const OutputsType &outputs)
		{
			this->inputs  = inputs;
			this->outputs = outputs;
		}

		void random_init(void)
		{
			H1h1.random_init(-1, 1);
			H1h2.random_init(-1, 1);
			Wih1.random_init(-1, 1);
			Wh1h2.random_init(-1, 1);
			Wh2o.random_init(-1, 1);
			B1h1.random_init(-1, 1);
			B1h2.random_init(-1, 1);
			B1o.random_init(-1, 1);
		}

		void random_init_better(uint count)
		{
			printf("random selecting (%d times)...\n", count);

			auto *bp = new BP<L1_AMT, L2_AMT>[count];
			for (int i=0; i<count; ++i)
			{
				bp[i].random_init();
			}

			uint minErrorIndex = 0;
			double minErrorValue = bp[0].get_error_sum();
			double maxErrorValue = minErrorValue;

			for (int i=1; i<count; ++i)
			{
				double nowErrorValue = bp[i].get_error_sum();
				if (minErrorValue > nowErrorValue)
				{
					minErrorValue = nowErrorValue;
					minErrorIndex = i;
				}
				if (maxErrorValue < nowErrorValue)
					maxErrorValue = nowErrorValue;
			}

			const BP<L1_AMT, L2_AMT> &best_bp = bp[minErrorIndex];
			this->B1h1  =  best_bp.B1h1;
			this->Wih1  =  best_bp.Wih1;
			this->H1h1  =  best_bp.H1h1;
			this->B1h2  =  best_bp.B1h2;
			this->Wh1h2 =  best_bp.Wh1h2;
			this->H1h2  =  best_bp.H1h2;
			this->B1o   =  best_bp.B1o;
			this->Wh2o  =  best_bp.Wh2o;

			printf("best: %f, worst: %f\n", minErrorValue, maxErrorValue);
			puts("random select OK!");
			delete []bp;
		}

		void evolution(uint count)
		{
			int minErrorIndex = -1;
			double minErrorValue = get_error_sum();

			BP<L1_AMT, L2_AMT> bp[count];

			for (int i=0; i<count; ++i)
			{
				bp[i].B1h1  =  this->B1h1;
				bp[i].B1h1.random_fix(0.01);
				bp[i].Wih1  =  this->Wih1;
				bp[i].Wih1.random_fix(0.01);
				bp[i].H1h1  =  this->H1h1;
				bp[i].H1h1.random_fix(0.01);
				bp[i].B1h2  =  this->B1h2;
				bp[i].B1h2.random_fix(0.01);
				bp[i].Wh1h2 =  this->Wh1h2;
				bp[i].Wh1h2.random_fix(0.01);
				bp[i].H1h2  =  this->H1h2;
				bp[i].H1h2.random_fix(0.01);
				bp[i].B1o   =  this->B1o;
				bp[i].B1o.random_fix(0.01);
				bp[i].Wh2o  =  this->Wh2o;
				bp[i].Wh2o.random_fix(0.01);

				double nowErrorValue = bp[i].get_error_sum();
				if (minErrorValue > nowErrorValue)
				{
					minErrorValue = nowErrorValue;
					minErrorIndex = i;
				}
			}

			if (minErrorIndex != -1)	// 变异出了更好的
			{
				this->B1h1  =  bp[minErrorIndex].B1h1;
				this->Wih1  =  bp[minErrorIndex].Wih1;
				this->H1h1  =  bp[minErrorIndex].H1h1;
				this->B1h2  =  bp[minErrorIndex].B1h2;
				this->Wh1h2 =  bp[minErrorIndex].Wh1h2;
				this->H1h2  =  bp[minErrorIndex].H1h2;
				this->B1o   =  bp[minErrorIndex].B1o;
				this->Wh2o  =  bp[minErrorIndex].Wh2o;
			}
		}

		void train(double rate = LEARNING_RATE, double error_limit = ERROR_LIMIT)
		{
			puts("\nstart training:\n");
			int times = 0;
			while (true)
			{
				evolution(32);
				++times;
				double error = 0;
				for (int t=0; t<TEST_AMT; t++)
				{
					const Matrix<1, INPUT_AMT> X1i = inputs[t];
					const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];

					const Matrix<1, INPUT_AMT> O1i = X1i;

					Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
					X1h1 = X1h1 + B1h1;
					const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();

					Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
					X1h2 = X1h2 + B1h2;
					const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();

					Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
					X1o = X1o + B1o;
					const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();

					const Matrix<1, OUTPUT_AMT> D1o = Y1o - O1o;
					Matrix<1, OUTPUT_AMT> E1o = D1o;
					E1o ^= X1o.dActFunc();

					const Matrix<OUTPUT_AMT, L2_AMT> Woh2 = ~Wh2o;
					const Matrix<1, L2_AMT> D1h2 = E1o * Woh2;
					Matrix<1, L2_AMT> E1h2 = D1h2;
					E1h2 ^= X1h2.dActFunc();

					const Matrix<L2_AMT, L1_AMT> Wh2h1 = ~Wh1h2;
					const Matrix<1, L1_AMT> D1h1 = E1h2 * Wh2h1;
					Matrix<1, L1_AMT> E1h1 = D1h1;
					E1h1 ^= X1h1.dActFunc();

					error += D1o.sq_sum() / 2;

					Wih1 += rate * ((~O1i).kronecker(E1h1));
					B1h1 += rate * E1h1;

					Wh1h2 += rate * ((~O1h1).kronecker(E1h2));
					B1h2 += rate * E1h2;

					Wh2o += rate * ((~O1h2).kronecker(E1o));
					B1o  += rate * E1o;
				}
				if ((times & 8191) == 0 || times == 1)
				{
					printf("迭代次数：%d，均方误差：%f，各自输出: \n", times, error);
					training_test();
				}
				if (error < error_limit)
				{
					printf("\n====================== 训练完成！======================\n迭代次数：%d，均方误差：%g\n绝对误差: \n", times, error);
					final_test();
					break;
				}
			}
		}

		double get_error_sum(void)
		{
			double error_sum = 0.0;
			for (int t=0; t<TEST_AMT; ++t)
			{
				const Matrix<1, INPUT_AMT> X1i = inputs[t];
				const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];

				const Matrix<1, INPUT_AMT> O1i = X1i;

				Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
				X1h1 = X1h1 + B1h1;
				const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();

				Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
				X1h2 = X1h2 + B1h2;
				const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();

				Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
				X1o = X1o + B1o;
				const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();

				const Matrix<1, OUTPUT_AMT> D1o = Y1o - O1o;

				error_sum += D1o.sq_sum();
			}
			return error_sum;
		}

		void training_test(void)
		{
			for (int t=0; t<TEST_AMT; ++t)
			{
				const Matrix<1, INPUT_AMT> X1i = inputs[t];
				const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];

				const Matrix<1, INPUT_AMT> O1i = X1i;

				Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
				X1h1 = X1h1 + B1h1;
				const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();

				Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
				X1h2 = X1h2 + B1h2;
				const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();

				Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
				X1o = X1o + B1o;
				const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();

				printf("[ ");
				for (int t=0; t<OUTPUT_AMT; ++t)
					if(fabs(O1o.m[0][t]) > 0.0001)printf("%.4f ", O1o.m[0][t]);
					else printf(" ----  ");
				printf("]\n");
			}
			printf("\n");
		}

		void final_test(void)
		{
			for (int t=0; t<TEST_AMT; ++t)
			{
				const Matrix<1, INPUT_AMT> X1i = inputs[t];
				const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];

				const Matrix<1, INPUT_AMT> O1i = X1i;

				Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
				X1h1 = X1h1 + B1h1;
				const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();

				Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
				X1h2 = X1h2 + B1h2;
				const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();

				Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
				X1o = X1o + B1o;
				const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();

				printf("[ ");
				for (int t=0; t<OUTPUT_AMT; ++t)
					printf("%.4f ", O1o.m[0][t]);
				printf("]\n");
			}
			printf("\n");
		}
};

void read_std_data(void)
{
	Matrix<1, INPUT_AMT> tInput;
	Matrix<1, OUTPUT_AMT> tOutput;

	std::ifstream fin("in.txt");
	if (fin.is_open())
	{
		for (int i=0; i<TEST_AMT; ++i)
		{
			for (int t=0; t<INPUT_AMT; ++t)
				fin >> tInput[0][t];
			for (int t=0; t<OUTPUT_AMT; ++t)
				fin >> tOutput[0][t];
			stdInputs[i] = tInput;
			stdOutputs[i] = tOutput;
		}
		fin.close();
	}
}

void print_std_data(void)
{
	for (int i=0; i<TEST_AMT; ++i)
	{
		for (int t=0; t<INPUT_AMT; ++t)
			std::cout << stdInputs[i][0][t] << ' ';
		for (int t=0; t<OUTPUT_AMT; ++t)
			std::cout << stdOutputs[i][0][t] << ' ';
		std::cout << '\n';
	}
}

int main()
{
	srand(time(nullptr));

	read_std_data();

	BP<8, 8> bp;
	bp.random_init_better(50000);

	bp.train();
}

Keyu Tian

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【学习笔记】BP神经网络

嗯嗯... 看了 3Blue1Brown 的神经网络结构简介视频之后，觉得这个人工ZZ网络好好玩啊！遂入坑。这是视频链接：https://www.bilibili.com/video/av15532370?from=search&amp;seid=10137053428682538306 先挂一段代码... 这两天苦受各种DDL欺压，只能稍后再详细记录我的学习历程啦~/* BP网...
复制链接

扫一扫