嗯嗯... 看了 3Blue1Brown 的神经网络结构简介视频之后,觉得这个人工ZZ网络好好玩啊!遂入坑。
这是视频链接:https://www.bilibili.com/video/av15532370?from=search&seid=10137053428682538306
先挂一段代码... 这两天苦受各种DDL欺压,只能稍后再详细记录我的学习历程啦~
/* BP网络 by Kevin
*
* Supervised Learning
* for Classification (Multi-class Classification)
*
* 矩阵类:自敲,模板类(便于编译器检查行列匹配等问题)
* 隐含层层数:两层
* 隐含层神经元个数:自定
* Tranint Data:4位二进制数 <-> 对应位置
* 前馈调节方式:梯度下降法
*/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <ctime>
#include <vector>
typedef unsigned int uint;
template <uint R, uint C, typename Type = double>
class Matrix
{
public:
Type m[R][C];
public:
Matrix(void) { }
Matrix(const Type val)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] = val;
}
Matrix(const Type inf, const Type sup)
{
const Type interval = (sup-inf) / RAND_MAX;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] = inf + interval * rand();
}
Matrix(const Matrix<R, C> &o)
{
memcpy(m, o.m, sizeof(m));
}
void random_init(const Type inf, const Type sup)
{
const Type interval = (sup-inf) / RAND_MAX;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] = inf + interval * rand();
}
void random_fix(const Type d)
{
const Type interval = 2*d / RAND_MAX;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
if (rand() < RAND_MAX/4)
m[r][c] += interval * rand() - d;
}
inline Matrix<R, C> & operator =(const Matrix<R, C> &o)
{
if (this != &o)
memcpy(m, o.m, sizeof(m));
return *this;
}
inline auto operator [](int index) -> decltype(*m)
{
return m[index];
}
Matrix<C, R> transpose(void) const
{
Matrix<C, R> ret;
for (int c=0; c<C; ++c)
for (int r=0; r<R; ++r)
ret.m[c][r] = m[r][c];
return ret;
}
Matrix<C, R> operator ~(void) const
{
Matrix<C, R> ret;
for (int c=0; c<C; ++c)
for (int r=0; r<R; ++r)
ret.m[c][r] = m[r][c];
return ret;
}
Matrix<R, C> operator +(const Matrix<R, C> &o) const
{
Matrix<R, C> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
ret.m[r][c] = m[r][c] + o.m[r][c];
return ret;
}
const Matrix<R, C> & operator +=(const Matrix<R, C> &o)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] += o.m[r][c];
return *this;
}
Matrix<R, C> operator -(const Matrix<R, C> &o) const
{
Matrix<R, C> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
ret.m[r][c] = m[r][c] - o.m[r][c];
return ret;
}
const Matrix<R, C> & operator -=(const Matrix<R, C> &o)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] -= o.m[r][c];
return *this;
}
// {R, C} * {C, C1}
template <uint C1>
Matrix<R, C1> operator *(const Matrix<C, C1> &o) const
{
Matrix<R, C1> ret;
for (int r=0; r<R; ++r)
for (int c1=0; c1<C1; ++c1)
{
ret.m[r][c1] = *m[r] * (*m)[c1];
for (int c=1; c<C; ++c)
ret.m[r][c1] += m[r][c] * o.m[c][c1];
}
return ret;
}
// {R, C} ^ {R, C}
Matrix<R, C> operator ^(const Matrix<R, C> &o) const
{
Matrix<R, C> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
ret.m[r][c] = m[r][c] * o.m[r][c];
return ret;
}
const Matrix<R, C> & operator ^=(const Matrix<R, C> &o)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] *= o.m[r][c];
return *this;
}
friend Matrix<R, C> operator *(const Type val, const Matrix<R, C> &o)
{
Matrix<R, C> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
ret.m[r][c] = o.m[r][c] * val;
return ret;
}
Matrix<R, C> operator *(const Type val) const
{
Matrix<R, C> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
ret.m[r][c] = m[r][c] * val;
return ret;
}
const Matrix<R, C> & operator *=(const Type val)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
m[r][c] *= val;
return *this;
}
// {R, C} * {R1, C1}
template<uint R1, uint C1>
Matrix <R * R1, C * C1> kronecker(const Matrix<R1, C1> &o) const
{
Matrix<R * R1, C * C1> ret;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
for (int r1=0; r1<R1; ++r1)
for (int c1=0; c1<C1; ++c1)
ret.m[r*R1 + r1][c*C1 + c1] = m[r][c] * o.m[r1][c1];
return ret;
}
Type sq_sum(void) const
{
Type sum = 0.0;
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
sum += m[r][c] * m[r][c];
return sum;
}
void println(void) const
{
for (int r=0; r<R; ++r, putchar(10))
for (int c=0; c<C; ++c)
printf("%f ", m[r][c]);
}
template <class Lambda>
void for_each(Lambda lambda)
{
for (int r=0; r<R; ++r)
for (int c=0; c<C; ++c)
lambda(m[r][c]);
}
static constexpr double logsig(double x)
{
return 1 / (1 + exp(-x));
}
Matrix<R, C> actFunc(void) const
{
Matrix<R, C> ret;
for (int c=0; c<C; ++c)
(*ret.m)[c] = 1 / (1 + exp(-(*m)[c]));
return ret;
}
Matrix<R, C> dActFunc(void) const
{
Matrix<R, C> ret;
for (int c=0; c<C; ++c)
{
double temp = exp((*m)[c]);
(*ret.m)[c] = temp / ((1+temp)*(1+temp));
}
return ret;
}
};
void read_std_data(void);
void print_std_data(void);
constexpr int TEST_AMT(16);
constexpr int INPUT_AMT(4);
constexpr int OUTPUT_AMT(16);
constexpr float LEARNING_RATE(0.05);
constexpr double ERROR_LIMIT(0.0000001);
typedef Matrix<1, INPUT_AMT> InputsType[TEST_AMT];
typedef Matrix<1, OUTPUT_AMT> OutputsType[TEST_AMT];
InputsType stdInputs;
OutputsType stdOutputs;
template <uint L1_AMT, uint L2_AMT>
class BP
{
private:
int _i = INPUT_AMT;
int _h1 = L1_AMT;
int _h2 = L2_AMT;
int _o = OUTPUT_AMT;
InputsType &inputs; // 训练用输入(向量)
OutputsType &outputs; // 训练用输入(向量)
Matrix< 1 , L1_AMT> B1h1; // 第一层的偏置系数(向量)
Matrix<INPUT_AMT, L1_AMT> Wih1; // 输入层和第一隐含层的突触(矩阵)
Matrix< 1 , L1_AMT> H1h1; // 第一隐含层神经元(向量)
Matrix< 1 , L2_AMT> B1h2; // 第二层的偏置系数(向量)
Matrix<L1_AMT, L2_AMT> Wh1h2; // 第一和第二隐含层间的突触(矩阵)
Matrix< 1 , L2_AMT> H1h2; // 第二隐含层神经元(向量)
Matrix< 1 , OUTPUT_AMT> B1o; // 输出层的偏置系数(向量)
Matrix<L2_AMT, OUTPUT_AMT> Wh2o; // 第二隐含层和输出层的突触(矩阵)
public:
BP(void) :
inputs(stdInputs), outputs(stdOutputs) { }
void init_inputs_and_outputs(const InputsType &inputs, const OutputsType &outputs)
{
this->inputs = inputs;
this->outputs = outputs;
}
void random_init(void)
{
H1h1.random_init(-1, 1);
H1h2.random_init(-1, 1);
Wih1.random_init(-1, 1);
Wh1h2.random_init(-1, 1);
Wh2o.random_init(-1, 1);
B1h1.random_init(-1, 1);
B1h2.random_init(-1, 1);
B1o.random_init(-1, 1);
}
void random_init_better(uint count)
{
printf("random selecting (%d times)...\n", count);
auto *bp = new BP<L1_AMT, L2_AMT>[count];
for (int i=0; i<count; ++i)
{
bp[i].random_init();
}
uint minErrorIndex = 0;
double minErrorValue = bp[0].get_error_sum();
double maxErrorValue = minErrorValue;
for (int i=1; i<count; ++i)
{
double nowErrorValue = bp[i].get_error_sum();
if (minErrorValue > nowErrorValue)
{
minErrorValue = nowErrorValue;
minErrorIndex = i;
}
if (maxErrorValue < nowErrorValue)
maxErrorValue = nowErrorValue;
}
const BP<L1_AMT, L2_AMT> &best_bp = bp[minErrorIndex];
this->B1h1 = best_bp.B1h1;
this->Wih1 = best_bp.Wih1;
this->H1h1 = best_bp.H1h1;
this->B1h2 = best_bp.B1h2;
this->Wh1h2 = best_bp.Wh1h2;
this->H1h2 = best_bp.H1h2;
this->B1o = best_bp.B1o;
this->Wh2o = best_bp.Wh2o;
printf("best: %f, worst: %f\n", minErrorValue, maxErrorValue);
puts("random select OK!");
delete []bp;
}
void evolution(uint count)
{
int minErrorIndex = -1;
double minErrorValue = get_error_sum();
BP<L1_AMT, L2_AMT> bp[count];
for (int i=0; i<count; ++i)
{
bp[i].B1h1 = this->B1h1;
bp[i].B1h1.random_fix(0.01);
bp[i].Wih1 = this->Wih1;
bp[i].Wih1.random_fix(0.01);
bp[i].H1h1 = this->H1h1;
bp[i].H1h1.random_fix(0.01);
bp[i].B1h2 = this->B1h2;
bp[i].B1h2.random_fix(0.01);
bp[i].Wh1h2 = this->Wh1h2;
bp[i].Wh1h2.random_fix(0.01);
bp[i].H1h2 = this->H1h2;
bp[i].H1h2.random_fix(0.01);
bp[i].B1o = this->B1o;
bp[i].B1o.random_fix(0.01);
bp[i].Wh2o = this->Wh2o;
bp[i].Wh2o.random_fix(0.01);
double nowErrorValue = bp[i].get_error_sum();
if (minErrorValue > nowErrorValue)
{
minErrorValue = nowErrorValue;
minErrorIndex = i;
}
}
if (minErrorIndex != -1) // 变异出了更好的
{
this->B1h1 = bp[minErrorIndex].B1h1;
this->Wih1 = bp[minErrorIndex].Wih1;
this->H1h1 = bp[minErrorIndex].H1h1;
this->B1h2 = bp[minErrorIndex].B1h2;
this->Wh1h2 = bp[minErrorIndex].Wh1h2;
this->H1h2 = bp[minErrorIndex].H1h2;
this->B1o = bp[minErrorIndex].B1o;
this->Wh2o = bp[minErrorIndex].Wh2o;
}
}
void train(double rate = LEARNING_RATE, double error_limit = ERROR_LIMIT)
{
puts("\nstart training:\n");
int times = 0;
while (true)
{
evolution(32);
++times;
double error = 0;
for (int t=0; t<TEST_AMT; t++)
{
const Matrix<1, INPUT_AMT> X1i = inputs[t];
const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];
const Matrix<1, INPUT_AMT> O1i = X1i;
Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
X1h1 = X1h1 + B1h1;
const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();
Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
X1h2 = X1h2 + B1h2;
const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();
Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
X1o = X1o + B1o;
const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();
const Matrix<1, OUTPUT_AMT> D1o = Y1o - O1o;
Matrix<1, OUTPUT_AMT> E1o = D1o;
E1o ^= X1o.dActFunc();
const Matrix<OUTPUT_AMT, L2_AMT> Woh2 = ~Wh2o;
const Matrix<1, L2_AMT> D1h2 = E1o * Woh2;
Matrix<1, L2_AMT> E1h2 = D1h2;
E1h2 ^= X1h2.dActFunc();
const Matrix<L2_AMT, L1_AMT> Wh2h1 = ~Wh1h2;
const Matrix<1, L1_AMT> D1h1 = E1h2 * Wh2h1;
Matrix<1, L1_AMT> E1h1 = D1h1;
E1h1 ^= X1h1.dActFunc();
error += D1o.sq_sum() / 2;
Wih1 += rate * ((~O1i).kronecker(E1h1));
B1h1 += rate * E1h1;
Wh1h2 += rate * ((~O1h1).kronecker(E1h2));
B1h2 += rate * E1h2;
Wh2o += rate * ((~O1h2).kronecker(E1o));
B1o += rate * E1o;
}
if ((times & 8191) == 0 || times == 1)
{
printf("迭代次数:%d,均方误差:%f,各自输出: \n", times, error);
training_test();
}
if (error < error_limit)
{
printf("\n====================== 训练完成!======================\n迭代次数:%d,均方误差:%g\n绝对误差: \n", times, error);
final_test();
break;
}
}
}
double get_error_sum(void)
{
double error_sum = 0.0;
for (int t=0; t<TEST_AMT; ++t)
{
const Matrix<1, INPUT_AMT> X1i = inputs[t];
const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];
const Matrix<1, INPUT_AMT> O1i = X1i;
Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
X1h1 = X1h1 + B1h1;
const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();
Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
X1h2 = X1h2 + B1h2;
const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();
Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
X1o = X1o + B1o;
const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();
const Matrix<1, OUTPUT_AMT> D1o = Y1o - O1o;
error_sum += D1o.sq_sum();
}
return error_sum;
}
void training_test(void)
{
for (int t=0; t<TEST_AMT; ++t)
{
const Matrix<1, INPUT_AMT> X1i = inputs[t];
const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];
const Matrix<1, INPUT_AMT> O1i = X1i;
Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
X1h1 = X1h1 + B1h1;
const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();
Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
X1h2 = X1h2 + B1h2;
const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();
Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
X1o = X1o + B1o;
const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();
printf("[ ");
for (int t=0; t<OUTPUT_AMT; ++t)
if(fabs(O1o.m[0][t]) > 0.0001)printf("%.4f ", O1o.m[0][t]);
else printf(" ---- ");
printf("]\n");
}
printf("\n");
}
void final_test(void)
{
for (int t=0; t<TEST_AMT; ++t)
{
const Matrix<1, INPUT_AMT> X1i = inputs[t];
const Matrix<1, OUTPUT_AMT> Y1o = outputs[t];
const Matrix<1, INPUT_AMT> O1i = X1i;
Matrix<1, L1_AMT> X1h1 = O1i * Wih1;
X1h1 = X1h1 + B1h1;
const Matrix<1, L1_AMT> O1h1 = X1h1.actFunc();
Matrix<1, L2_AMT> X1h2 = O1h1 * Wh1h2;
X1h2 = X1h2 + B1h2;
const Matrix<1, L2_AMT> O1h2 = X1h2.actFunc();
Matrix<1, OUTPUT_AMT> X1o = O1h2 * Wh2o;
X1o = X1o + B1o;
const Matrix<1, OUTPUT_AMT> O1o = X1o.actFunc();
printf("[ ");
for (int t=0; t<OUTPUT_AMT; ++t)
printf("%.4f ", O1o.m[0][t]);
printf("]\n");
}
printf("\n");
}
};
void read_std_data(void)
{
Matrix<1, INPUT_AMT> tInput;
Matrix<1, OUTPUT_AMT> tOutput;
std::ifstream fin("in.txt");
if (fin.is_open())
{
for (int i=0; i<TEST_AMT; ++i)
{
for (int t=0; t<INPUT_AMT; ++t)
fin >> tInput[0][t];
for (int t=0; t<OUTPUT_AMT; ++t)
fin >> tOutput[0][t];
stdInputs[i] = tInput;
stdOutputs[i] = tOutput;
}
fin.close();
}
}
void print_std_data(void)
{
for (int i=0; i<TEST_AMT; ++i)
{
for (int t=0; t<INPUT_AMT; ++t)
std::cout << stdInputs[i][0][t] << ' ';
for (int t=0; t<OUTPUT_AMT; ++t)
std::cout << stdOutputs[i][0][t] << ' ';
std::cout << '\n';
}
}
int main()
{
srand(time(nullptr));
read_std_data();
BP<8, 8> bp;
bp.random_init_better(50000);
bp.train();
}