C++实现神经网络识别数字

最新推荐文章于 2022-09-07 16:25:40 发布

GaoJieVery6

最新推荐文章于 2022-09-07 16:25:40 发布

阅读量5.9k

点赞数 1

分类专栏：机器学习

本文链接：https://blog.csdn.net/WukongAKK/article/details/82687960

版权

机器学习专栏收录该内容

20 篇文章 2 订阅

订阅专栏

0.综述

我用的神经网络是ANN，下文会介绍训练ANN的反向传播算法并给出相应的数学推导过程，以及一个简单的C++实现的基于反向传播的ANN代码，这个ANN可以进行粗略的数字识别，代码简单易懂，适合新手练手。

1.本文提供的资料

一份用于训练神经网络参数的ANN代码，复制粘贴即可使用。

一份通过使用上述代码训练出的参数进行数字预测的代码，复制粘贴即可使用。

拥有5000个样例的数据集。

拥有10个数据的测试集。

//相关资料点这里

2.数据说明

通过把20*20像素的灰度图按列展开得到的向量，即一个数据有400个特征，给出利用matlab制作数据的代码。

pic = imread('C:\Users\lenovo\Desktop\test\6.jpg');   %读入图片
fid = fopen('C:\Users\lenovo\Desktop\mypic.txt', 'wt');
pic = rgb2gray(pic);                               %要把图片转为2维的灰度图。
pic = imresize(pic, [20, 20]);                     %压缩图片像素到20*20
pic = im2double(pic);                              %由于matlab中用double型存图，所以把图片转化为double型
T = pic(:);                                        %把20*20的矩阵展开为400*1的列向量
for i = 1:400
    fprintf(fid, '%f ', T(i,1));
end;

3.其他说明

a.由于我的二维数组开的很大，在进行编译代码时需要手动给程序扩大栈，否则程序可能出现运行错误，如过利用dev cpp编译，扩大栈的方法点这里。

b.为了使代码简单易懂，算法的实现相对粗略，且由于博主的水平所限，没有在代码中加入相应的正则化处理，梯度下降正确性检测等，导致训练出的参数无法识别数字0。

c.抛开上述两点，代码最大的优点就是简单易懂！！！复制粘贴即用！！！

d.数据集一定要随机化，大量的同一个数字连续出现这个神经网络多半就凉了

4.理论知识和数学推导

这些推导需要求偏导的知识，如果你看不懂，也没关系，这里有一个直观的理解，反向传播算法就是先把网络中的所有W，V两个矩阵随机初始化，初始化的范围是（-a, +a)，a = $\sqrt[2]{6}/{\sqrt[2]{out + in}}$ , out和in分别是输出层和输入层单元的个数，这个初始化还是很重要的，初始化不对，很容易造成最终结果不收敛。初始化后，先正向传播，把alpha，b，beta， y这几个矩阵都算出来，y是输出层矩阵。然后开始反向传播，简单来说，就是通过y与标准输出的差别来调整每个 W,V的每个值，差的多就调整幅度大，差的小就调整幅度小，使通过正向传播得到的y与标准输出的差别越来越小。

这是伪代码：

5.源代码

基本和上面的伪代码是一样的

训练神经网络的cpp

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <vector>
using namespace std;
const int max1 = 500;
const int max2 = 500;

//矩阵类 
class Matrix{
	public:
		//构造函数 
		Matrix(int x = 0) : ro(x) {}
		Matrix(int x, int y,  double a[max1])
		: ro(x), co(y) {
			for(int i = 1; i <= ro; i++)	el[i][1] = a[i];
		}
		Matrix(int x, int y, double a[max1][max2])
		: ro(x), co(y) {
			for(int i = 1; i <= ro; i++)
			for(int j = 1; j <= co; j++)
				el[i][j] = a[i][j];
		}
		
		
		//矩阵转置 
		Matrix transpose();
		//返回矩阵的行数，列数，某元素 
		int row() const { return ro; }
		int col() const { return co; }
		double ele(const int& i, const int& j) const { return el[i][j]; }
		//改变矩阵某元素 
		void change(int i, int j, double x) { el[i][j] = x; }
		//给矩阵内所有元素求和 
		double sum() { double s = 0; for(int i=1;i<=ro;i++)for(int j=1;j<=co;j++)s+=el[i][j];return s; }
		//输出矩阵 
		void print() { for(int i=1;i<=ro;i++){for(int j=1;j<=co;j++)printf("%.6lf ",el[i][j]);printf("\n"); }printf("\n"); }
	
	
	private:
		int ro;
		int co;
		double  el[max1][max2];
};


const int data_num = 5000;//数据集大小 
const int feature = 400;//特征量 
const int hidden = 25;//隐藏层个数 
const int output = 10;//输出层个数 
const double eta1 = 0.5;//权重矩阵W的学习速率 
const double eta2 = 0.5;//权重矩阵V的学习速率 
double data[5005][500];//数据集 
Matrix V, W;//权重矩阵 
double Y[5005];//每组数据对应的数字 
Matrix alpha, b, hidden_bias, beta, error3, error2, deltaW, deltaV, y, output_bias;
//基本和第4点第一幅图一样。hidden_bias为隐藏层偏置向量，output_bias为输出层偏置向量 


//矩阵转置 
Matrix Matrix::transpose(){
	double temp[max1][max2];
	int row = this->row();
	int col = this->col();
	for(int i = 1; i <= row; i++)
	for(int j = 1; j <= col; j++)
		temp[j][i] = this->ele(i, j);
	return Matrix(this->col(), this->row(), temp);
}


//矩阵加法 
inline	
Matrix operator + (const Matrix& lhs, const Matrix& rhs){
	double temp[max1][max2];
	if(lhs.row() != rhs.row() || lhs.col() != rhs.col())
		return Matrix(-1);
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) + rhs.ele(i, j);
	return Matrix(lhs.row(), lhs.col(), temp);
}


//矩阵减法 
inline
Matrix operator - (const Matrix& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = -rhs.ele(i, j);
	return  lhs + Matrix(rhs.row(), rhs.col(), temp);
}


inline
Matrix operator - (const double& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = lhs - rhs.ele(i, j);
	return  Matrix(rhs.row(), rhs.col(), temp);
}


//矩阵乘法 
inline
Matrix operator * (const Matrix& lhs, const Matrix& rhs) {
	if(lhs.col() != rhs.row())
		return Matrix(-1);
	double temp[max1][max2];
	memset(temp, 0, sizeof(temp));
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++) 
	for(int k = 1; k <= lhs.col(); k++)
		temp[i][j] += lhs.ele(i, k) * rhs.ele(k, j);
	return Matrix(lhs.row(), rhs.col(), temp);
}


inline 
Matrix operator * (const double& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = rhs.ele(i, j) * lhs;
	return Matrix(rhs.row(), rhs.col(), temp);
}


//两个矩阵对应元素相乘 
inline 
Matrix operator ->* (const Matrix& lhs, const Matrix& rhs) {
	if(lhs.row() != rhs.row() || lhs.col() != rhs.col())	return Matrix(-1);
	double temp[max1][max2];
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) * rhs.ele(i, j);
	return Matrix(lhs.row(), lhs.col(), temp);
}
	

//矩阵除法 
inline
Matrix operator / (const Matrix& lhs, const double& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) / rhs;
	return Matrix(lhs.row(), lhs.col(), temp);
}


//读取数据 
void read() {
	for(int i = 1; i <= 5000; i++)
	for(int j = 1; j <= 400; j++)
	{
		scanf("%lf", &data[i][j-1]);
	}
	for(int i = 1; i <= 5000; i++)
		cin >> Y[i];
}


//提取数据集中的一行作为输入 
int get_data(int id, Matrix& T) {
	T = Matrix(feature, 1, data[id]);
	return Y[id];
}


//sigmooid函数 
double sigmoid(double t) {	return (double)1/(double(1+exp(-t))); }


Matrix sigmoid(Matrix T) {
	double temp[max1][max2];
	for(int i = 1; i <= T.row(); i++)
	for(int j = 1; j <= T.col(); j++) {
		double jf = T.ele(i, j);
		temp[i][j] = sigmoid(jf); 
	}
	return Matrix(T.row(), T.col(), temp);
}


//随机初始化 
void random_inilialize() {
	double temp[max1][max2];
	double temp1[max1];
	for(int i = 1; i <= hidden; i++)
	for(int j = 1; j <= feature; j++)
		temp[i][j] = double(rand() % 1000) / 5000 - 0.1;
	V = Matrix(hidden, feature, temp);
	for(int i = 1; i <= output; i++)
	for(int j = 1; j <= hidden; j++)
		temp[i][j] = double(rand() % 1000) / 5000 - 0.1;
	W = Matrix(output, hidden, temp);
	for(int i = 1; i <= hidden; i++)
		temp1[i] = double(rand() % 1000) / 5000 - 0.1;
	hidden_bias = Matrix(hidden, 1, temp1);
	for(int i = 1; i <= output; i++)
		temp1[i] = double(rand() % 1000) / 5000 - 0.1;
	output_bias = Matrix(output, 1, temp1);
}
	

//计算输出层误差 
Matrix compute_error(Matrix out, int right) {
	double temp[max1];
	for(int i = 1; i <= output; i++) {
		double o = (i == right) ? 1 : 0;
		double t = out.ele(i, 1);
		temp[i] = t*(1-t)*(o-t);
	}
	return Matrix(out.row(), 1, temp);
}

/*
Matrix bias(const Matrix& t) {
	double temp[max1];
	for(int i = 1; i <= t.row(); i++)	temp[i+1] = t.ele(i, 1);
	temp[1] = -1;
	return Matrix(t.row()+1, 1, temp);
}


Matrix remove_bias(Matrix T) {
	double temp[max1];
	for(int i = 2; i <= T.row(); i++)
		temp[i-1] = T.ele(i, 1);
	return  Matrix(T.row()-1, 1, temp);
}
*/

//画图 
void picture(Matrix X) {
	int t = 1;
	double pic[25][25];
	for(int i = 1; i <= 20; i++)
	for(int j = 1; j <= 20; j++)
		pic[j][i] = X.ele(t++, 1);
	for(int i = 1; i <= 20; i++){
	for(int j = 1; j <= 20; j++){
		if(pic[i][j] > 0.3)	printf(".");
		else				printf(" ");
	}
	printf("\n");
	}
}
			

//训练神经网络 
void trainANN(Matrix X, int right) {
	
	//正向传播 
	alpha = V * X;
	b = sigmoid(alpha - hidden_bias);
	beta = W * b;
	y = sigmoid(beta - output_bias);
	//y.print();
	//反向传播 
	//计算输出层误差  
	error3 = compute_error(y, right);
	//计算W的改变量 
	deltaW = eta1 * error3 * b.transpose();
	//计算隐藏层误差 
	error2 = b ->* (1-b) ->* (W.transpose() * error3);
	//计算V的改变量 
	deltaV = eta2 * error2 * X.transpose();
	//更新各权重矩阵 
	V = V + deltaV;
	W = W + deltaW;
	output_bias = output_bias + eta1 * error3;
	hidden_bias = hidden_bias + eta2 * error2;
}

/*
int predict(Matrix R, int& minv) {
	y = W * bias(V * bias(R));
	int id = -1;
	minv = -1;
	for(int i = 1; i <= y.row(); i++)
		if(y.ele(i, 1) > minv) {
			id = i; minv = y.ele(i, 1);
		}
	return id;
}
*/	
		
int main() {
	freopen("zztest.txt","r",stdin);
	freopen("zans.txt","w",stdout);
	srand(time(NULL));
	read();
	random_inilialize();
	double temp[max1][max2];
	memset(temp, 0, sizeof(temp));
	double m = 5000;
	for(int i = 1; i <= 3; i++){
		deltaV = Matrix(V.row(), V.col(), temp);
		deltaW = Matrix(W.row(), W.col(), temp);
		for(int j = 1; j <= 5000; j++) {
			Matrix X;
			int right = get_data(j, X);
			//printf("%d %d %d\n", i, j, right);
			//picture(X);
			trainANN(X, right);
		}
	}
	//输出各权重矩阵 
	V.print();
	W.print();
	hidden_bias.print();
	output_bias.print();
	return 0;
}

预测数字的cpp

#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <vector>
using namespace std;
const int max1 = 500;
const int max2 = 500;
//矩阵类 
class Matrix{
	public:
		//默认构造函数 
		Matrix(int x = 0) : ro(x) {}
		//构造函数 
		Matrix(int x, int y,  double a[max1])
		: ro(x), co(y) {
			for(int i = 1; i <= ro; i++)	el[i][1] = a[i];
		}
		//构造函数 
		Matrix(int x, int y, double a[max1][max2])
		: ro(x), co(y) {
			for(int i = 1; i <= ro; i++)
			for(int j = 1; j <= co; j++)
				el[i][j] = a[i][j];
		}
		
		//矩阵转置 
		Matrix transpose();
		//返回矩阵的行数，列数，某元素 
		int row() const { return ro; }
		int col() const { return co; }
		double ele(const int& i, const int& j) const { return el[i][j]; }
		//改变矩阵某元素 
		void change(int i, int j, double x) { el[i][j] = x; }
		//给所有元素求和 
		double sum() { double s = 0; for(int i=1;i<=ro;i++)for(int j=1;j<=co;j++)s+=el[i][j];return s; }
		//输出矩阵 
		void print() { for(int i=1;i<=ro;i++){for(int j=1;j<=co;j++)printf("%.6lf ",el[i][j]);printf("\n"); }printf("\n"); }
	
	
	private:
		int ro;
		int co;
		double  el[max1][max2];
};


const int data_num = 5000;//数据集数量 
const int feature = 400;//特征数量 
const int hidden = 25;//隐藏层数量 
const int output = 10;//输出层数量 
const int eta = 0.03;//学习速率 
Matrix alpha, b, hidden_bias, beta, error3, error2, deltaW, deltaV, y, output_bias;
//alpha, b, beta, deltaW, deltaV, y 和第四点的第一幅图上的变量命名是一样的。hidden_bias是隐藏层的偏置向量，output_bias是输出层的偏置向量 
double data[5005][500];//数据集 
Matrix V, W, X;//四点的第一幅图上的变量命名是一样的
double Y[5005];//每个数据对应的数字 

//矩阵转置 
Matrix Matrix::transpose(){
	double temp[max1][max2];
	int row = this->row();
	int col = this->col();
	for(int i = 1; i <= row; i++)
	for(int j = 1; j <= col; j++)
		temp[j][i] = this->ele(i, j);
	return Matrix(this->col(), this->row(), temp);
}


//矩阵加法 
inline	
Matrix operator + (const Matrix& lhs, const Matrix& rhs){
	double temp[max1][max2];
	if(lhs.row() != rhs.row() || lhs.col() != rhs.col())
		return Matrix(-1);
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) + rhs.ele(i, j);
	return Matrix(lhs.row(), lhs.col(), temp);
}


//矩阵减法 
inline
Matrix operator - (const Matrix& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = -rhs.ele(i, j);
	return  lhs + Matrix(rhs.row(), rhs.col(), temp);
}


inline
Matrix operator - (const double& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = lhs - rhs.ele(i, j);
	return  Matrix(rhs.row(), rhs.col(), temp);
}


//矩阵乘法 
inline
Matrix operator * (const Matrix& lhs, const Matrix& rhs) {
	if(lhs.col() != rhs.row())
		return Matrix(-1);
	double temp[max1][max2];
	memset(temp, 0, sizeof(temp));
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++) 
	for(int k = 1; k <= lhs.col(); k++)
		temp[i][j] += lhs.ele(i, k) * rhs.ele(k, j);
	return Matrix(lhs.row(), rhs.col(), temp);
}


inline 
Matrix operator * (const double& lhs, const Matrix& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= rhs.row(); i++)
	for(int j = 1; j <= rhs.col(); j++)
		temp[i][j] = rhs.ele(i, j) * lhs;
	return Matrix(rhs.row(), rhs.col(), temp);
}


//两个矩阵的对应元素相乘，类似于matlab中的".*" 
inline 
Matrix operator ->* (const Matrix& lhs, const Matrix& rhs) {
	if(lhs.row() != rhs.row() || lhs.col() != rhs.col())	return Matrix(-1);
	double temp[max1][max2];
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) * rhs.ele(i, j);
	return Matrix(lhs.row(), lhs.col(), temp);
}
	

//矩阵除法 
inline
Matrix operator / (const Matrix& lhs, const int& rhs) {
	double temp[max1][max2];
	for(int i = 1; i <= lhs.row(); i++)
	for(int j = 1; j <= lhs.col(); j++)
		temp[i][j] = lhs.ele(i, j) / rhs;
	return Matrix(lhs.row(), lhs.col(), temp);
}


//读入数据 
void read() {
	double temp[max1][max2];
	double temp1[max1];
	for(int i = 1; i <= hidden; i++)
	for(int j = 1; j <= feature; j++)
		cin >> temp[i][j];
	//读入V矩阵 
	V = Matrix(hidden, feature, temp);
	for(int i = 1; i <= output; i++)
	for(int j = 1; j <= hidden; j++)
		cin >> temp[i][j];
	//读入W矩阵 
	W = Matrix(output, hidden, temp);
	for(int i = 1; i <= hidden; i++)
		cin >> temp1[i];
	//读入隐藏层偏置向量 
	hidden_bias = Matrix(hidden, 1, temp1);
	for(int i = 1; i <= output; i++)
		cin >> temp1[i];
	//读入输出层偏置向量 
	output_bias = Matrix(output, 1, temp1);
	for(int i = 1; i <= 400; i++)
		cin >> temp1[i];
	//读入待预测数据 
	X = Matrix(400, 1, temp1);
}


//sigmoid函数 
double sigmoid(double t) {	return (double)1/(double(1+exp(-t))); }


//矩阵的sigmoid函数 
Matrix sigmoid(Matrix T) {
	double temp[max1][max2];
	for(int i = 1; i <= T.row(); i++)
	for(int j = 1; j <= T.col(); j++) {
		double jf = T.ele(i, j);
		temp[i][j] = sigmoid(jf); 
	}
	return Matrix(T.row(), T.col(), temp);
}
	

//预测 
void predict() {
	alpha = V * X;
	b = sigmoid(alpha - hidden_bias);
	beta = W * b;
	y = sigmoid(beta - output_bias);
	//输出测试数据是各个数字的概率 
	y.print();
	double maxv = -1;
	int id = -1;
	for(int i = 1; i <= 10; i++) {
		if(y.ele(i, 1) > maxv) {
			maxv = y.ele(i, 1);
			id = i;
		}
	}
	printf("The digit is : %d\n", id);
}

void picture() {
	//绘制图片 
	int t = 1;
	double pic[25][25];
	for(int i = 1; i <= 20; i++)
	for(int j = 1; j <= 20; j++)
		pic[j][i] = X.ele(t++, 1);
	for(int i = 1; i <= 20; i++){
	for(int j = 1; j <= 20; j++){
		if(pic[i][j] > 0.3)	printf(".");
		else				printf(" ");
	}
	printf("\n");
	}
}


int main() {
	freopen("ztest.txt","r",stdin);
	read();
	predict();
	picture();
	return 0;
}

运行结果如下，上面是测试数据为各数字的概率，中间是预测的数字，下面是数据的展示。

GaoJieVery6

关注

1
点赞
踩
49

收藏

觉得还不错? 一键收藏
4
评论
C++实现神经网络识别数字

0.综述我用的神经网络是ANN，下文会介绍训练ANN的反向传播算法并给出相应的数学推导过程，以及一个简单的C++实现的基于反向传播的ANN代码，这个ANN可以进行粗略的数字识别，代码简单易懂，适合新手练手。1.本文提供的资料一份用于训练神经网络参数的ANN代码，复制粘贴即可使用。一份通过使用上述代码训练出的参数进行数字预测的代码，复制粘贴即可使用。拥有5000个样例的数据集。...
复制链接

扫一扫