上一期介绍了用于储存图像的矩阵类,在写卷积神经网络前,先用C++实现一个三层的神经网络。以手写数字识别为例。嗯,非常经典到老掉牙的例子。
首先先定义一些基本的操作的函数:
查看矩阵信息的函数:
void show(matrix &m) { for (int i = 0; i < m.size(); i++) cout << m.x[i] << endl; cout << "=============" << endl; }
void show_info(matrix &m)
{
cout << "cols is " << m.cols << endl;
cout << "rows is " << m.rows << endl;
cout << "chans is " << m.chans << endl;
}
用于打乱数据集:
vector<int> get_a_sequence(int length)
{
vector<int> sequence;
for (int i = 0; i < length; i++) { sequence.push_back(i); }
return sequence;
}
void dataset_shuffle(vector<vector<float>> &train_images, vector<vector<int>> &Labels, vector<int> &sequence)
{
random_shuffle(sequence.begin(), sequence.end());
vector<vector<float>> new_train_images(60000);
vector<vector<int>> new_train_labels(60000);
//new_train_images = train_images;
//new_train_labels = Labels;
for (int i = 0; i < train_images.size(); i++)
{
new_train_images[i] = train_images[i];
new_train_labels[i] = Labels[i];
}
int index = 0;
for (int i = 0; i < train_images.size(); i++)
{
index = sequence[i];
train_images[i] = new_train_images[index];
Labels[i] = new_train_labels[index];
}
}
获得向量的最大值下标,用于测试函数:
inline int Argmax(float *x, int size)
{
float max = 0;
int argmax = 0;
for (int i = 0; i < size; i++)
{
if (max <= x[i])
{
max = x[i];
argmax = i;
}
}
return argmax;
}
测试函数:(因为没有封装,所以写成了三层全连接层的形式)
void test(vector<vector<float>> &test_images, vector<int> &test_labels, vector<matrix> ¶meter)
{
cout << "正在测试中。。。" << endl;
vector <vector<int>> Labels;
Labels = Onehot_encoding(test_labels, 10);
vector<float> Img;
vector<int> Label;
matrix W1, W2, B1, B2;
W1 = parameter[0];
W2 = parameter[1];
B1 = parameter[2];
B2 = parameter[3];
matrix Hide, Hide_s;
matrix Output, Output_s;
int correct_num = 0;
float accuracy_rate;
for (int k = 0; k < test_images.size(); k++)
{
Img = test_images[k];
Label = Labels[k];
matrix img(28, 28, 1); vector_to_matrix(img.x, Img);
matrix label(10, 1, 1); vector_to_matrix(label.x, Label);
//数据正向传播
Hide = M_dot_V(W1, img) + B1;
Hide_s = sigmoid_f(Hide);
Output = M_dot_V(W2, Hide_s) + B2;
Output_s = sigmoid_f(Output);
int L, index;
L = Argmax(Output_s.x, 10);
if (L == test_labels[k]) correct_num++;
}
accuracy_rate = 1.0f*correct_num / test_images.size();
cout << "测试准确率为:" << accuracy_rate << endl;
}
数据集读取:写成.h文件
#pragma once
#include <iostream> // cout
#include <sstream>
#include <fstream>
#include <iomanip> //setw
#include <random>
#include <stdio.h>
namespace mnist
{
std::string data_name() {return std::string("MNIST");}
// from tiny_cnn
template<typename T>
T* reverse_endian(T* p) {
std::reverse(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + sizeof(T));
return p;
}
// from tiny_cnn (kinda)
bool parse_mnist_labels(const std::string& label_file, std::vector<int> *labels) {
std::ifstream ifs(label_file.c_str(), std::ios::in | std::ios::binary);
if (ifs.bad() || ifs.fail())
{
return false;
}
int magic_number, num_items;
ifs.read((char*) &magic_number, 4);
ifs.read((char*) &num_items, 4);
reverse_endian(&magic_number);
reverse_endian(&num_items);
for (size_t i = 0; i < num_items; i++) {
unsigned char label;
ifs.read((char*) &label, 1);
labels->push_back((int) label);
}
return true;
}
// from tiny_cnn
struct mnist_header {
int magic_number;
int num_items;
int num_rows;
int num_cols;
};
// from tiny_cnn (kinda)
bool parse_mnist_images(const std::string& image_file,
std::vector<std::vector<float>> *images,
float scale_min = -1.0, float scale_max = 1.0,
int x_padding = 0, int y_padding = 0)
{
std::ifstream ifs(image_file.c_str(), std::ios::in | std::ios::binary);
if (ifs.bad() || ifs.fail())
{
return false;
}
mnist_header header;
// read header
ifs.read((char*) &header.magic_number, 4);
ifs.read((char*) &header.num_items, 4);
ifs.read((char*) &header.num_rows, 4);
ifs.read((char*) &header.num_cols, 4);
reverse_endian(&header.magic_number);
reverse_endian(&header.num_items);
reverse_endian(&header.num_rows);
reverse_endian(&header.num_cols);
const int width = header.num_cols + 2 * x_padding;
const int height = header.num_rows + 2 * y_padding;
// read each image
for (size_t i = 0; i < header.num_items; i++)
{
std::vector<float> image;
std::vector<unsigned char> image_vec(header.num_rows * header.num_cols);
ifs.read((char*) &image_vec[0], header.num_rows * header.num_cols);
image.resize(width * height, scale_min);
for (size_t y = 0; y < header.num_rows; y++)
{
for (size_t x = 0; x < header.num_cols; x++)
image[width * (y + y_padding) + x + x_padding] =
(image_vec[y * header.num_cols + x] / 255.0f) * (scale_max - scale_min) + scale_min;
}
images->push_back(image);
}
return true;
}
// == load data (MNIST-28x28x1 size, no padding, pixel range -1 to 1)
bool parse_test_data(std::string &data_path, std::vector<std::vector<float>> &test_images, std::vector<int> &test_labels,
float min_val=-1.f, float max_val=1.f, int padx=0, int pady=0)
{
if(!parse_mnist_images(data_path+"/t10k-images.idx3-ubyte", &test_images, min_val, max_val, padx, pady))
if (!parse_mnist_images(data_path + "/t10k-images-idx3-ubyte", &test_images, min_val, max_val, padx, pady))
return false;
if(!parse_mnist_labels(data_path+"/t10k-labels.idx1-ubyte", &test_labels))
if (!parse_mnist_labels(data_path + "/t10k-labels-idx1-ubyte", &test_labels)) return false;
return true;
}
bool parse_train_data(std::string &data_path, std::vector<std::vector<float>> &train_images, std::vector<int> &train_labels,
float min_val=-1.f, float max_val=1.f, int padx=0, int pady=0)
{
if(!parse_mnist_images(data_path+"/train-images.idx3-ubyte", &train_images, min_val, max_val, padx, pady))
if (!parse_mnist_images(data_path + "/train-images-idx3-ubyte", &train_images, min_val, max_val, padx, pady))
return false;
if(!parse_mnist_labels(data_path+"/train-labels.idx1-ubyte", &train_labels))
if (!parse_mnist_labels(data_path + "/train-labels-idx1-ubyte", &train_labels)) return false;
return true;
}
}
数据集处理函数:
//========================数据集处理=============================================
//对标签数据做one_hot编码
vector<vector<int>> Onehot_encoding(vector<int> &labels, int type_nums)
{
vector<vector<int>> labels_encoding;
for (int i = 0; i < labels.size(); i++)
{
vector<int> temp(type_nums, 0);
temp[labels[i]] = 1;
labels_encoding.push_back(temp);
}
return labels_encoding;
}
//把数据集分成几个大小为batch_size的集合,主要用于样本图像
vector<vector<vector<float>>> split_batch_x(vector<vector<float>> &imgs, int batch_size)
{
vector<vector<vector<float>>> x;
int _size = imgs.size() / batch_size;
for (int i = 0; i < _size; i++)
{
vector<vector<float>> batch_x;
for (int j = 0; j < batch_size; j++)
{
batch_x.push_back(imgs[j + i * batch_size]);
}
x.push_back(batch_x);
}
return x;
}
//把数据集分成几个大小为batch_size的集合,主要用于样本标签
vector<vector<vector<int>>> split_batch_y(vector<vector<int>> &labels, int batch_size)
{
vector<vector<vector<int>>> y;
int _size = labels.size() / batch_size;
for (int i = 0; i < _size; i++)
{
vector<vector<int>> batch_y;
for (int j = 0; j < batch_size; j++)
{
batch_y.push_back(labels[j + i * batch_size]);
}
y.push_back(batch_y);
}
return y;
}
激活函数和它的导数,这里选用了sigmoid:
inline matrix sigmoid_f(matrix &m)
{
matrix out(m.cols, m.rows, m.chans);
for (int i = 0; i < out.size(); i++) out.x[i] = 1.0f / (1.0f + exp(-(m.x[i])));
return out;
}
inline matrix sigmoid_df(matrix &m)
{
matrix out(m.cols, m.rows, m.chans);
for (int i = 0; i < out.size(); i++) out.x[i] = m.x[i] * (1.f - m.x[i]);
return out;
然后再加点矩阵类的一些补充运算
//=======================矩阵类的一些补充运算====================================
//将vector转化为matrix
inline void vector_to_matrix(float *m, vector<int> &v)
{
for (int i = 0; i < v.size(); i++) m[i] = v[i];
}
inline void vector_to_matrix(float *m, vector<float> &v)
{
for (int i = 0; i < v.size(); i++) m[i] = v[i];
}
//矩阵转置
inline matrix Transposition(matrix &m)
{
int _w = m.cols;
int _h = m.rows;
matrix out(_h, _w, 1);
for (int i = 0; i < _h; i++)
{
for (int j = 0; j < _w; j++)
{
out.x[j + i * _w] = m.x[i + j * _h];
}
}
return out;
}
//矩阵*向量,结果是一个向量那种,矩阵和向量都是存成一维的形式,matrix类,你懂的
inline matrix M_dot_V(matrix &A, matrix &B)
{
int _w = A.cols;
int _h = A.rows;
matrix out(_w, 1, 1);
out.fill(0);
for (int i = 0; i < _w; i++)
{
for (int j = 0; j < _h; j++)
{
out.x[i] += A.x[j + i * _h] * B.x[j];
}
}
return out;
}
//向量*向量,结果是一个矩阵那种,矩阵和向量都是存成一维的形式,matrix类,你懂的
inline matrix V_dot_V(matrix &A, matrix &B)
{
int _w = A.size();
int _h = B.size();
matrix out(_w, _h, 1);
for (int i = 0; i < _w; i++)
{
for (int j = 0; j < _h; j++)
{
out.x[j + i * _h] = A.x[i] * B.x[j];
}
}
return out;
}
别忘了还要导入一些相关的包哦:
include <vector>
include <iostream>
using namespace std;
using namespace
最后是主函数(记得把手写数字的数据集下载下来,放在对应目录下):
int main()
{
string data_path = "data/mnist/";
int batch_size = 10;
int num_input = 28 * 28 * 1;
int num_hide = 30;
int num_classes = 10;
float lr = 3; // 学习率
int epochs = 1;
//====================================================================
//读取数据集
vector<vector<float>> train_images, train_images_copy;
vector<int> train_labels;
vector<vector<float>> test_images;
vector<int> test_labels;
cout << "读取数据中" << endl;
if (!parse_test_data(data_path, test_images, test_labels)) { std::cerr << "error: could not parse data.\n"; return 1; }
if (!parse_train_data(data_path, train_images, train_labels)) { std::cerr << "error: could not parse data.\n"; return 1; }
cout << "数据读取完成" << endl;
train_images_copy = train_images;
//====================================================================
vector<vector<int>> Labels;
Labels = Onehot_encoding(train_labels, 10);
vector<vector<vector<float>>> x;
vector<vector<float>> batch_x;
vector<float> Img;
vector<vector<vector<int>>> y;
vector<vector<int>> batch_y;
vector<int> Label;
vector<int> sequence;
sequence = get_a_sequence(train_images.size()); //生成一个序列,用来打乱作为索引,用于打乱数据集
//========================================================================
//全连接层
matrix W1(num_hide, num_input, 1);
matrix B1(num_hide, 1, 1);
matrix W2(num_classes, num_hide, 1);
matrix B2(num_classes, 1, 1);
W1.fill_random_normal(1.f);
B1.fill_random_normal(1.f);
W2.fill_random_normal(1.f);
B2.fill_random_normal(1.f);
matrix Hide, Hide_s; //隐藏层
matrix Output, Output_s; //输出层
matrix delta_1, delta_2; //隐藏层和输出层误差
matrix W1_T, W2_T; //矩阵的转置
matrix W1_t, W2_t, B1_t, B2_t; //用来存参数的梯度
//========================================================================
//开始训练
for (int epoch = 0; epoch < epochs; epoch++)
{
dataset_shuffle(train_images, Labels, sequence); //随机打乱数据集
x = split_batch_x(train_images, batch_size);
y = split_batch_y(Labels, batch_size);
//=====================================================================
for (int m = 0; m < x.size(); m++)
{
batch_x = x[m];
batch_y = y[m];
matrix W1_t_sum(W1.cols, W1.rows, W1.chans); W1_t_sum.fill(0);
matrix W2_t_sum(W2.cols, W2.rows, W2.chans); W2_t_sum.fill(0);
matrix B1_t_sum(B1.cols, B1.rows, B1.chans); B1_t_sum.fill(0);
matrix B2_t_sum(B2.cols, B2.rows, B2.chans); B2_t_sum.fill(0);
for (int n = 0; n < batch_x.size(); n++)
{
Img = batch_x[n];
Label = batch_y[n];
matrix img(28, 28, 1); vector_to_matrix(img.x, Img);
matrix label(10, 1, 1); vector_to_matrix(label.x, Label);
Hide = M_dot_V(W1, img) + B1;
Hide_s = sigmoid_f(Hide);
Output = M_dot_V(W2, Hide_s) + B2;
Output_s = sigmoid_f(Output);
//=======================================================================
//误差反向传播
delta_1 = (Output_s - label)*sigmoid_df(Output_s); //输出层误差
W1_T = Transposition(W1);
W2_T = Transposition(W2);
delta_2 = M_dot_V(W2_T, delta_1)*sigmoid_df(Hide_s); //隐藏层误差
//=======================================================================
//计算参数梯度
B2_t = delta_1;
B1_t = delta_2;
W2_t = V_dot_V(delta_1, Hide_s);
W1_t = V_dot_V(delta_2, img);
B2_t_sum += B2_t;
B1_t_sum += B1_t;
W2_t_sum += W2_t;
W1_t_sum += W1_t;
}
//更新参数===============================================================
W1 = W1 - W1_t_sum * (lr / batch_size);
W2 = W2 - W2_t_sum * (lr / batch_size);
B1 = B1 - B1_t_sum * (lr / batch_size);
B2 = B2 - B2_t_sum * (lr / batch_size);
if (m % 100 == 0) cout << "=====================正在训练第" << epoch << "个epoch,第" << m << "个batch=====================" << endl;
}
}
cout << "训练完成" << endl;
vector<matrix> parameter;
parameter.push_back(W1);
parameter.push_back(W2);
parameter.push_back(B1);
parameter.push_back(B2);
test(train_images_copy, train_labels, parameter);
test(test_images,test_labels,parameter);
system("pause");
return 0;
}
最后是实验结果:
50个epochs 可以达到93%以上的准确率,然后是100个epochs的结果:
可以看到准确率可以达到94%以上,不过神经网络差不多只能达到这个结果了,要想有更好的结果,就要使用卷积神经网络了,后面有时间再补充上了。