C++识别MNIST数据集（二分类）

最新推荐文章于 2022-05-21 21:24:08 发布
mine .
最新推荐文章于 2022-05-21 21:24:08 发布
阅读量855
点赞数 1
分类专栏： C++ 文章标签： c++ 机器学习
本文链接：https://blog.csdn.net/weixin_44263726/article/details/109892252
版权
C++ 专栏收录该内容
1 篇文章 0 订阅
订阅专栏
C++识别MNIST数据集

#include <vector> 
#include <string> 
#include <fstream> 
#include <iostream>
#include <Eigen/Eigen/Dense> 
#include <ctime>
#include <windows.h>

using namespace std;
using namespace Eigen;

// 大端转小端
int ReverseInt(int i)
{
	unsigned char ch1, ch2, ch3, ch4;
	ch1 = i & 255;
	ch2 = (i >> 8) & 255;
	ch3 = (i >> 16) & 255;
	ch4 = (i >> 24) & 255;
	return((int)ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}

// 建立读取标签的函数
void read_Mnist_Label(string filename, vector<float>&labels)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		cout << endl;
		cout << "\t" << "magic number = " << magic_number << endl;
		cout << "\t" << "number of images = " << number_of_images << endl;
		for (int i = 0;
			i < number_of_images; i++)
		{
			unsigned char label = 0;
			file.read((char*)&label, sizeof(label));
			labels.push_back((float)label);
		}
	}
}

// 建立读取图像的函数
void read_Mnist_Images(string filename, vector<vector<float>>&images)
{
	ifstream file(filename, ios::binary);
	if (file.is_open())
	{
		int magic_number = 0;
		int number_of_images = 0;
		int n_rows = 0;
		int n_cols = 0;
		// unsigned char label;
		file.read((char*)&magic_number, sizeof(magic_number));
		file.read((char*)&number_of_images, sizeof(number_of_images));
		file.read((char*)&n_rows, sizeof(n_rows));
		file.read((char*)&n_cols, sizeof(n_cols));
		magic_number = ReverseInt(magic_number);
		number_of_images = ReverseInt(number_of_images);
		n_rows = ReverseInt(n_rows);
		n_cols = ReverseInt(n_cols);
		cout << endl;
		cout << "\t" << "magic number = " << magic_number << endl;
		cout << "\t" << "number of images = " << number_of_images << endl;
		cout << "\t" << "rows = " << n_rows << endl;
		cout << "\t" << "cols = " << n_cols << endl;
		for (int i = 0; i < number_of_images; i++)
		{
			vector<float>tp;
			for (int r = 0; r < n_rows; r++)
			{
				for (int c = 0; c < n_cols; c++)
				{
					unsigned char image = 0;
					file.read((char*)&image, sizeof(image));
					tp.push_back(image);
				}
			}
			images.push_back(tp);
		}
	}
}

int main(int argc,char *argv[])
{
	SetConsoleTitle(L"Handwritten digit classification");
	clock_t tStart = clock();
	Eigen::initParallel();

	vector<vector<float>>images;           // 声明训练集样本矩阵
	vector<float>labels;                   // 声明训练集标签向量
	vector<int> effective_col;             // 声明有效特征向量所在列

	cout << endl << "训练集图片信息：" << endl;
	read_Mnist_Images("train-images.idx3-ubyte", images);          // 读取训练集样本数据
	cout << endl << "训练集标签信息：" << endl;
	read_Mnist_Label("train-labels.idx1-ubyte", labels);           // 读取训练集标签数据

	auto m = images.size();      // 训练集矩阵行数
	auto n = images[0].size();   // 训练集矩阵列数
	auto b = labels.size();      // 训练集标签个数

	// 统计训练集样本中有效特征所在列
	for (unsigned j = 0; j < n; j++)
	{
		int num_of_nonzero = 0;
		for (unsigned i = 0; i < m; i++)
		{
			if (images[i][j] != 0)
				num_of_nonzero += 1;
		}
		if (num_of_nonzero >= 600)
			effective_col.push_back(j);

	}

	auto num_key = effective_col.size();      // 有效特征个数

	// cout << endl << "训练集有效特征列数 = " << num_key << endl;

	// 建立训练集60000*494的特征矩阵
	MatrixXf Characteristic_matrix(m, num_key + 1);
	for (unsigned i = 0; i < m; i++)
	{
		for (unsigned j = 0; j < num_key; j++)
		{
			Characteristic_matrix(i, j) = images[i][effective_col[j]];
		}
		Characteristic_matrix(i, num_key) = 1;
	}

	images.~vector<vector<float>>();        // 析构函数，释放内存

	auto m_cm = Characteristic_matrix.rows();        // 训练集特征矩阵行数
	auto n_cm = Characteristic_matrix.cols();        // 训练集特征矩阵列数

	cout << endl;
	cout << "训练集特征矩阵的行数 = " << m_cm << endl
		 << "训练集特征矩阵的列数 = " << n_cm << endl;

	VectorXf actual_Y(b);                   // 初始化训练集实际值

	// 将训练集标签分类，0 = 1，非0 = -1
	for (unsigned i = 0; i < b; i++)
		labels[i] == 0 ? actual_Y(i) = 1 : actual_Y(i) = -1;

	labels.~vector<float>();      // 析构函数，释放内存

	// 用QR分解求取拟合系数向量
	VectorXf fit_coefficient = Characteristic_matrix.householderQr().solve(actual_Y);

	// 利用模型对训练集进行预测，并计算错误率
	VectorXf prediction_Y = Characteristic_matrix * fit_coefficient;

	int tp = 0, fn = 0, fp = 0, tn = 0;       // tp:正确的正值  fn:错误的负值  fp:错误的正值  tn:正确的负值

	for (unsigned j = 0; j < prediction_Y.size(); j++)
	{
		if (prediction_Y[j] >= 0 && actual_Y[j] == 1)
			tp += 1;
		if (prediction_Y[j] >= 0 && actual_Y[j] == -1)
			fn += 1;
		if (prediction_Y[j] < 0 && actual_Y[j] == 1)
			fp += 1;
		if (prediction_Y[j] < 0 && actual_Y[j] == -1)
			tn += 1;
	}

	cout << endl;
	cout << "训练集预测结果：" << endl;
	cout << "\t" << "tp = " << tp << "\t" << "fn = " << fn << endl
		 << "\t" << "fp = " << fp << "\t" << "tn = " << tn << endl;
	cout << "\t" << "错误率 = " << 1.0*(fn + fp) / 60000 << endl;

	vector<vector<float>>test_images;      // 声明测试集样本矩阵
	vector<float>test_labels;              // 声明测试集标签向量

	cout << endl << "测试集图片信息：" << endl;
	read_Mnist_Images("t10k-images.idx3-ubyte", test_images);         // 读取测试集样本数据
	cout << endl << "测试集标签信息：" << endl;
	read_Mnist_Label("t10k-labels.idx1-ubyte", test_labels);          // 读取测试集标签数据

	auto m_test = test_images.size();      // 测试集矩阵行数
	auto b_test = test_labels.size();      // 测试集标签个数

	//建立测试集10000*494的特征矩阵
	MatrixXf t_Characteristic_matrix(m_test, num_key + 1);
	for (unsigned i = 0; i < m_test; i++)
	{
		for (unsigned j = 0; j < num_key; j++)
		{
			t_Characteristic_matrix(i, j) = test_images[i][effective_col[j]];
		}
		t_Characteristic_matrix(i, num_key) = 1;
	}

	auto m_tcm = t_Characteristic_matrix.rows();        // 测试集特征矩阵行数
	auto n_tcm = t_Characteristic_matrix.cols();        // 测试集特征矩阵列数

	cout << "测试集特征矩阵的行数 = " << m_tcm << endl
		 << "测试集特征矩阵的列数 = " << n_tcm << endl;


	VectorXf t_actual_Y(b_test);            // 初始化测试集实际值

	// 将测试集标签分类，0 = 1，非0 = -1
	for (unsigned i = 0; i < b_test; i++)
		test_labels[i] == 0 ? t_actual_Y(i) = 1 : t_actual_Y(i) = -1;

	test_labels.~vector<float>();     // 析构函数，释放内存

	// 利用模型对测试集进行预测，并计算错误率
	VectorXf t_prediction_Y = t_Characteristic_matrix * fit_coefficient;

	tp = fn = fp = tn = 0;
	for (unsigned j = 0; j < t_prediction_Y.size(); j++)
	{
		if (t_prediction_Y[j] >= 0 && t_actual_Y[j] == 1)
			tp += 1;
		if (t_prediction_Y[j] >= 0 && t_actual_Y[j] == -1)
			fn += 1;
		if (t_prediction_Y[j] < 0 && t_actual_Y[j] == 1)
			fp += 1;
		if (t_prediction_Y[j] < 0 && t_actual_Y[j] == -1)
			tn += 1;
	}
	cout << endl;
	cout << "测试集预测结果：" << endl;
	cout << "\t" << "tp = " << tp << "\t" << "fn = " << fn << endl
		 << "\t" << "fp = " << fp << "\t" << "tn = " << tn << endl;
	cout << "\t" << "错误率 = " << 1.0*(fn + fp) / 10000 << endl;

	cout << endl << "---------------------添加随机特征---------------------------" << endl;

	unsigned random_character = 5000;        // 设置随机特征个数

	// 构建随机矩阵R(494,5000)  矩阵元素取 ±1
	MatrixXf R = MatrixXf::Random(n_cm, random_character);
	for (unsigned j = 0; j < random_character; j++)
	{
		for (unsigned i = 0; i <n_cm; i++)
		{
			R(i, j)>0 ? R(i, j) = 1 : R(i, j) = -1;
		}
	}

	// 用随机矩阵构建新特征
	MatrixXf R_Characteristic_matrix = Characteristic_matrix * R;   // 60000*5000
	for (unsigned i = 0; i < m_cm; i++)
	{
		for (unsigned j = 0; j < random_character; j++)
		{
			if (R_Characteristic_matrix(i, j) < 0)
				R_Characteristic_matrix(i, j) = 0;
		}
	}
	//cout << "训练集随机矩阵的行数 = " << R_Characteristic_matrix.rows() << endl
	//	 << "训练集随机矩阵的列数 = " << R_Characteristic_matrix.cols() << endl;

	// 构建新的特征矩阵
	MatrixXf new_Characteristic_matrix(m, n_cm + random_character);
	new_Characteristic_matrix << Characteristic_matrix, R_Characteristic_matrix;

	Characteristic_matrix.resize(0, 0);
	R_Characteristic_matrix.resize(0, 0);

	cout << endl;
	cout << "训练集新特征矩阵的行数 = " << new_Characteristic_matrix.rows() << endl
		 << "训练集新特征矩阵的列数 = " << new_Characteristic_matrix.cols() << endl;

	// 求新的拟合系数
	VectorXf new_fit_coefficient = new_Characteristic_matrix.householderQr().solve(actual_Y);

	VectorXf new_prediction_Y = new_Characteristic_matrix * new_fit_coefficient;

	tp = fn = fp = tn = 0;
	for (int j = 0; j < new_prediction_Y.size(); j++)
	{
		if (new_prediction_Y[j] >= 0 && actual_Y[j] == 1)
			tp += 1;
		if (new_prediction_Y[j] >= 0 && actual_Y[j] == -1)
			fn += 1;
		if (new_prediction_Y[j] < 0 && actual_Y[j] == 1)
			fp += 1;
		if (new_prediction_Y[j] < 0 && actual_Y[j] == -1)
			tn += 1;
	}
	cout << endl;
	cout << "训练集预测结果：" << endl;
	cout << "\t" << "tp = " << tp << "\t" << "fn = " << fn << endl
		 << "\t" << "fp = " << fp << "\t" << "tn = " << tn << endl;
	cout << "\t" << "错误率 = " << 1.0*(fn + fp) / 60000 << endl;

	// 构建测试集的随机特征
	MatrixXf R_t_Characteristic_matrix = t_Characteristic_matrix * R;   //10000*5000
	R.resize(0, 0);    // 释放内存
	for (unsigned i = 0; i < m_test; i++)
	{
		for (unsigned j = 0; j < random_character; j++)
		{
			if (R_t_Characteristic_matrix(i, j) < 0)
				R_t_Characteristic_matrix(i, j) = 0;
		}
	}
	//cout << "测试集随机矩阵的行数=" << R_t_Characteristic_matrix.rows() << endl
	//	 << "测试集随机矩阵的列数=" << R_t_Characteristic_matrix.cols() << endl;

	// 构建测试集新的特征矩阵
	MatrixXf n_t_Characteristic_matrix(m_test, n_tcm + random_character);
	n_t_Characteristic_matrix << t_Characteristic_matrix, R_t_Characteristic_matrix;

	cout << endl;
	cout << "测试集新特征矩阵的行数 = " << n_t_Characteristic_matrix.rows() << endl
		 << "测试集新特征矩阵的列数 = " << n_t_Characteristic_matrix.cols() << endl;

	VectorXf n_t_prediction_Y = n_t_Characteristic_matrix * new_fit_coefficient;

	tp = fn = fp = tn = 0;
	for (int j = 0; j < n_t_prediction_Y.size(); j++)
	{
		if (n_t_prediction_Y[j] >= 0 && t_actual_Y[j] == 1)
			tp += 1;
		if (n_t_prediction_Y[j] >= 0 && t_actual_Y[j] == -1)
			fn += 1;
		if (n_t_prediction_Y[j] < 0 && t_actual_Y[j] == 1)
			fp += 1;
		if (n_t_prediction_Y[j] < 0 && t_actual_Y[j] == -1)
			tn += 1;
	}
	cout << endl;
	cout << "测试集预测结果：" << endl;
	cout << "\t" << "tp = " << tp << "\t" << "fn = " << fn << endl
		 << "\t" << "fp = " << fp << "\t\t" << "tn = " << tn << endl;
	cout << "\t" << "错误率 = " << 1.0*(fn + fp) / 10000 << endl;

	clock_t tEnd = clock();       // 结束计时
	cout << "程序总运行时间(s)：" << float(tEnd - tStart) / CLOCKS_PER_SEC << endl;

	system("pause");
	return 0;
}
mine .
关注
1
点赞
踩
17

收藏

觉得还不错? 一键收藏
0
评论
C++识别MNIST数据集（二分类）

C++识别MNIST数据集#include <vector> #include <string> #include <fstream> #include <iostream>#include <Eigen/Eigen/Dense> #include <ctime>#include <windows.h>using namespace std;using namespace Eigen;// 大端转小端
复制链接

扫一扫