花了2,3天的时间写了c++版本的cnn(其实一天就基本完成代码,后来两天都在做测试,调参数什么的),当然写的这么快主要是前面看了比较多讲解神经网络和卷积神经网络的文章和论文,所以基本搞懂了其中的难点(bp),所以写起来也就比较得心应手。下面先粗糙地贴下代码
#pragma once
#include"timer.h"
class CNN
{
public:
CNN();
~CNN();
void init();
void readFile();
void ff(int index);
void bp(int index);
void update(int index);
double evaluate();
void train();
void test();
void checkGrad();
private:
Timer timer;
static const int train_times = 10000000;
double a;
double r;
static const int layer_num = 7;
static const int batch_size = 1;
static const int train_size = 20000;
static const int test_size = 40000;
//训练图像的label
int label[train_size];
//输入层(测试数据)
double testI0[test_size][32][32];
//输入层(训练数据)
double I0[train_size][32][32];
//连接
bool connection[6][12];
//C1层
double C1[batch_size][6][28][28]; //卷积后的图像
double C1_conv[6][5][5]; //卷积核
double C1_dconv[6][5][5];//卷积核导数
double C1_b[6]; //bias
double C1_db[6]; //bias
double C1_d[batch_size][6][28][28]; //残差
//S2层
double S2[batch_size][6][14][14]; //pool后的图像
double S2_d[batch_size][6][14][14]; //残差
//double S2_w[6];
//double S2_b[6];
//double S2_dw[6];
//double S2_db[6];
//double S2_mean[batch_size][6][14][14]; //保存ff时的平均采样值
//C3层
double C3[batch_size][16][10][10];
double C3_conv[6][16][10][10];
double C3_dconv[6][16][10][10];
double C3_b[16];
double C3_db[16];
double C3_d[batch_size][16][10][10];
//S4层
double S4[batch_size][16][5][5];
double S4_d[batch_size][16][5][5];
//double S4_w[16];
//double S4_b[16];
//double S4_dw[16];
//double S4_db[16];
//double S4_mean[batch_size][16][5][5];
//C5层
double C5[batch_size][120];
double C5_conv[16][120][5][5];
double C5_dconv[16][120][5][5];
double C5_b[120];
double C5_db[120];
double C5_d[batch_size][120];
//F6层
//double F6[batch_size][84];
//double F6_p[120][84];
//double F6_dp[120][84];
//double F6_b[84];
//double F6_db[84];
//double F6_d[batch_size][84];
double F6[batch_size][10];
double F6_p[120][10];
double F6_dp[120][10];
double F6_b[10];
double F6_db[10];
double F6_d[batch_size][10];
//F7层
//double F7[batch_size][10];
//double F7_p[84][10];
//double F7_dp[84][10];//导数
//double F7_b[10];
//double F7_db[10];
//double F7_d[batch_size][10];
private:
double sigmoi(double x);
double derivate_sigmoi(double z);
double random(); //产生0~1之间的随机数
double getCost_checkgGrad();
};
// CNN.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "CNN.h"
#include "timer.h"
CNN::CNN()
{
a = 1;
r = 0;
bool temp[6][16] = {
{true,false,false,false,true,true,true,false,false,true,true,true,true,false,true,true},
{true,true,false,false,false,true,true,true,false,false,true,true,true,true,false,true},
{true,true,true,false,false,false,true,true,true,false,false,true,false,true,true,true},
{false,true,true,true,false,false,true,true,true,true,false,false,true,false,true,true},
{false,false,true,true,true,false,false,true,true,true,true,false,true,true,false,true},
{false,false,false,true,true,true,false,false,true,true,true,true,false,true,true,true}
};
for(int i = 0; i < 6; ++i)
for(int j = 0; j < 16; ++j)
connection[i][j] = temp[i][j];
}
CNN::~CNN()
{
}
double CNN::random()
{
return rand()/(double)RAND_MAX;
}
double CNN::sigmoi(double x)
{
return 1.0/(double)(1+exp(-x));
}
double CNN::derivate_sigmoi(double z)
{
return z * (1 - z);
}
void CNN::init()
{
double scale = 1;
cout<<"init...";
timer.start();
srand((unsigned int)time(NULL));
//初始化C1层
double fan_in = 1 * 5 * 5;
double fan_out = 6 * 5 * 5;
for(int i = 0; i < 6; ++i)
{
for(int j = 0; j < 5; ++j)
for(int k = 0; k < 5; ++k)
C1_conv[i][j][k] = (random() - 0.5) * 2 / scale;
C1_b[i] = 0;
}
//初始化S2层
/*for(int i = 0; i < 6; ++i)
{
S2_w[i] = (random() - 0.005) * 2;
S2_b[i] = (random() - 0.005) * 2;
}*/
//初始化C3层
for(int i = 0; i < 16; ++i)
{
for(int j = 0; j < 6; ++j)
{
for(int k = 0; k < 5; ++k)
for(int l = 0; l < 5; ++l)
C3_conv[j][i][k][l] = (random() - 0.5) * 2 /scale;
}
C3_b[i] = 0;
}
//初始化S4层
/*for(int i = 0; i < 16; ++i)
{
S4_w[i] = (random() - 0.005) * 2;
S4_b[i] = (random() - 0.005) * 2;
}*/
//初始化C5层
for(int i = 0; i < 120; ++i)
{
for(int j = 0; j < 16; ++j)
{
for(int k = 0; k < 5; ++k)
for(int l = 0; l < 5; ++l)
C5_conv[j][i][k][l] = (random() - 0.5) * 2 / scale;
}
C5_b[i] = 0;
}
//初始化F6层
for(int i = 0; i < 10; ++i)
{
for(int j = 0; j < 120; ++j)
F6_p[j][i] = (random() - 0.5) * 2 / scale;
F6_b[i] = 0;
}
//初始化F7层
//for(int i = 0; i < 10; ++i)
//{
// for(int j = 0; j < 84; ++j)
// F7_p[j][i] = (random() - 0.5) * 2 / scale;
// F7_b[i] = 0;
//}
cout<<" cost time:"<<timer.end()<<"s"<<endl;
}
void CNN::readFile()
{
ifstream infile("train.csv");
string header;
int id,r,g,b;
char comma;
getline(infile,header);
cout<<"read train file...";
timer.start();
for(int i = 0;i < train_size; ++i)
{
infile >> id;
infile >> comma;
infile >> label[i];
for(int j = 0;j < 32; ++j)
{
for(int k = 0; k < 32; ++k)
{
infile >> comma;
infile >> r;
infile >> comma;
infile >> g;
infile >> comma;
infile >> b;
I0[i][j][k] = (r*0.299 + g*0.587 + b*0.114) / 256; //灰度化输入图像,在缩放到0~1之间
//cout<<I0[i][j][k]<<endl;
}
}
}
cout<<" cost time: "<<timer.end()<<'s'<<endl;
}
void CNN::ff(int index)
{
for(int i = 0; i < batch_size; ++i)
{
//C1层
for(int j =0; j < 6; ++j)
{
for(int x = 0; x < 28; ++x)
{
for(int y = 0; y < 28; ++y)
{
double temp = 0;
for(int conv_x = 0; conv_x < 5; ++conv_x)
{
for(int conv_y = 0; conv_y < 5; ++conv_y)
{
temp += I0[index + i][x + conv_x][y + conv_y] * C1_conv[j][conv_x][conv_y];
}
}
C1[i][j][x][y] = sigmoi(temp + C1_b[j]);
}
}
}
//S2层
for(int j = 0; j < 6; ++j)
{
for(int x = 0; x < 14; ++x)
{
for(int y = 0; y < 14; ++y)
{
double temp = 0;
for(int conv_x = x * 2; conv_x < x * 2 + 2; ++conv_x)
{
for(int conv_y = y * 2; conv_y < y * 2 + 2; ++conv_y)
{
temp += C1[i][j][conv_x][conv_y];
}
}
//S2_mean[i][j][x][y] = temp / 4.0;
//S2[i][j][x][y] = sigmoi((temp / 4.0) * S2_w[j] + S2_b[j]);
S2[i][j][x][y] = temp / 4.0;
}
}
}
//C3层
for(int j = 0; j < 16; ++j) //C3的map个数
{
for(int x = 0; x < 10; ++x)
{
for(int y = 0; y < 10; ++y)
{
double temp = 0;
for(int k = 0; k < 6; ++k) //S2的map个数
{
if(connection[k][j])
{
for(int conv_x = 0; conv_x < 5; ++conv_x)
{
for(int conv_y = 0; conv_y < 5; ++conv_y)
{
temp += S2[i][k][x + conv_x][y + conv_y] * C3_conv[k][j][conv_x][conv_y];
}
}
}
}
C3[i][j][x][y] = sigmoi(temp + C3_b[j]);
}
}
}
//S4层
for(int j = 0; j < 16; ++j)
{
for(int x = 0; x < 5; ++x)
{
for(int y = 0; y < 5; ++y)
{
double temp = 0