之前实现的CNN是多层卷积池化最后加上一个判别层形成的,但是只有一个通道一个核。对于三个通道的图像这样显然是不够好的,不能利用颜色信息。所以就大改了一波,形成一个多通道的CNN。还是先展示测试代码:
#include "cnn.hpp"
int main(int argc, char** argv)
{
using cnn_type = cnn <
nadam, sigmoid, XavierGaussian // 判别层使用的更新方法、激活函数和初始化方法
, 3, 32, 32, 4 // 3通道、32*32图像、4输出的判别层
, nadam, ReLu, XavierGaussian // 卷积池化层使用的更新方法、激活函数和初始化方法
, 3 // 3卷积核
, 2, 2 // 卷积核尺寸
, 2, 2 // 卷积核步幅
, 2, 2 // 池化层尺寸
, 3 // 层输出(下一层通道数)
, 3 // 3卷积核
, 2, 2 // 卷积核尺寸
, 2, 2 // 卷积核步幅
, 2, 2 // 池化层尺寸
, 3 // 层输出(下一层通道数)
, 2 // 2卷积核
, 2, 2 // 卷积核尺寸
, 2, 2 // 卷积核步幅
, 2, 2 // 池化层尺寸
, 2 // 最终输出
> ;
cnn_type cnn_layer;
cnn_type::input_type mti(.4);
cnn_type::ret_type mto(.8);
weight_initilizer<class def>::cal(mti, 0., 1.);
weight_initilizer<def>::cal(mto, 0, 1);
for (int i = 0; ; ++i)
{
auto mtok = cnn_layer.forward(mti);
cnn_layer.backward(mtok - mto);
if (i % 1000 == 0)
{
mto.print();
auto mmm = cnn_layer.forward(mti);
mmm.print();
_getch();
}
}
}
上面实现的是一个32*32三通道图像作为输入;3层卷积池化;一个以sigmoid为激活函数的BP神经网络作为判别层。测试结果展示如下:
可看出1000次训练结果已经很不错了;3000次训练已经几乎完全符合。接下来是重点了,我们来展示一下新进的修改。
mat.hpp为了实现嵌套做了一些修改:
#ifndef _MAT_HPP_
#define _MAT_HPP_
#include <climits>
#include <map>
#include <iostream>
#include <iomanip>
#ifndef USER_BOOST
#include <boost/pool/pool.hpp>
#endif
template<typename val_t>
val_t max_and_swap(const val_t& v1, const val_t& v2)
{
return v1 < v2 ? v2 : v1;
}
template<int i_size, typename val_t>
struct mat_m
{
static boost::pool<> s_pool;
val_t* p;
mat_m() :p(nullptr)
{
#ifndef USE_BOOST
p = (val_t*)malloc(i_size * sizeof(val_t));
#else
p = (val_t*)(s_pool.malloc());
#endif
for (int i = 0; i < i_size; ++i)
{
new(p + i) val_t(0);
//p[i] = 0;
}
}
~mat_m()
{
if (p)
{
#ifndef USE_BOOST
for (int i = 0; i < i_size; ++i)
p[i].~val_t();
free(p);
#else
for (int i = 0; i < i_size; ++i)
p[i].~val_t();
s_pool.free(p);
#endif
}
}
val_t& get(const int& len_1d, const int& i_1d_idx, const int& i_2d_idx)
{
val_t& ret = p[i_2d_idx + len_1d * i_1d_idx];
return ret;
}
val_t max_abs() const
{
double d = -1*DBL_MAX;
for (int i = 0; i < i_size; ++i)
{
d = d < abs(p[i]) ? abs(p[i]) : d;
}
return d;
}
val_t max() const
{
val_t d = -1 * DBL_MAX;
for (int i = 0; i < i_size; ++i)
{
//d = d < (p[i]) ? (p[i]) : d;
d = max_and_swap(p[i], d);
}
return d;
}
val_t sum() const
{
val_t d_sum = 0.;
for (int i = 0; i < i_size; ++i)
{
d_sum = d_sum + p[i];
}
return d_sum;
}
template<int len_1d, int i_1d_idx, int i_2d_idx>
inline val_t& get_val()
{
static_assert((i_2d_idx + len_1d * i_1d_idx) < i_size, "ERROR:mat_m over flow!!!");
return p[i_2d_idx + len_1d * i_1d_idx];
}
template<int len_1d, int i_1d_idx, int i_2d_idx>
inline val_t get_val() const
{
return p[i_2d_idx + len_1d * i_1d_idx];
}
};
template<int i_size, typename val_t>
boost::pool<> mat_m<i_size, val_t>::s_pool = boost::pool<>(i_size * sizeof(val_t));
template<int row_num, int col_num, typename val_t = double>
struct mat
{
using type = val_t;
typedef val_t vt;
static constexpr int r = row_num;
static constexpr int c = col_num;
using mat_m_t = mat_m<row_num * col_num, val_t>;
std::shared_ptr<mat_m_t> pval;
bool b_t;
mat():b_t(false)
{
pval = std::make_shared<mat_m_t>();
}
mat(const mat<row_num, col_num, val_t>& other) :pval(other.pval), b_t(other.b_t)
{
}
mat(const val_t&& v):b_t(false)
{
pval = std::make_shared<mat_m_t>();
for (int i = 0; i < row_num; ++i)
{
for (int j = 0; j < col_num; ++j)
{
pval->get(col_num, i, j) = v;
}
}
}
mat(const val_t& v) :b_t(false)
{
pval = std::make_shared<mat_m_t>();
for (int i = 0; i < row_num; ++i)
{
for (int j = 0; j < col_num; ++j)
{
pval->get(col_num, i, j) = v;
}
}
}
#if 0
template<typename val_other_t>
mat(const val_other_t& v) :b_t(false)
{
pval = std::make_shared<mat_m_t>();
for (int i = 0; i < row_num; ++i)
{
for (int j = 0; j < col_num; ++j)
{
pval->get(col_num, i, j) = static_cast<val_t>(v);
}
}
}
#endif
mat(const std::initializer_list<val_t>& lst):b_t(false)
{
pval = std::make_shared<mat_m_t>();
auto itr = lst.begin();
for (int i = 0; i < row_num; ++i)
{
for (int j = 0; j < col_num; ++j)
{
if (itr == lst.end())return;
pval->get(col_num, i, j) = *itr;
itr++;
}
}
}
val_t& get(const int& i_row, const int& i_col)
{
if (!b_t)
return pval->get(col_num, i_row, i_col);
else
return pval->get(row_num, i_col, i_row);
}
val_t get(const int& i_row, const int& i_col) const
{
if (!b_t)
return pval->get(col_num, i_row, i_col);
else
return pval->get(row_num, i_col, i_row);
}
template<int i_1d_idx, int i_2d_idx>
inline val_t& get_val()
{
if (!b_t)
return pval->get_val<col_num, i_1d_idx, i_2d_idx>();
else
return pval->get_val<row_num, i_2d_idx, i_1d_idx>();
}
template<int i_1d_idx, int i_2d_idx>
inline val_t get_val() const
{
static_assert(i_1d_idx < row_num && i_2d_idx < col_num, "ERROR: mat::get_val overflow!!!!!");
if (!b_t)
return pval->get_val<col_num, i_1d_idx, i_2d_idx>();
else
return pval->get_val<row_num, i_2d_idx, i_1d_idx>();
}
mat<col_num, row_num, val_t> t()
{
mat<col_num, row_num, val_t> ret;
ret.pval = pval;
ret.b_t = !b_t;
return ret;
}
val_t max_abs() const
{
return pval->max_abs();
}
val_t max() const
{
return pval->max();
}
val_t sum() const
{
return pval->sum();
}
void print() const
{
std::cout << "[" << std::endl;
for (int i = 0; i < row_num; ++i)
{
std::cout << std::setw(3) << "[";
for (int j = 0; j < col_num; ++j)
{
std::cout << (j != 0 ? "," : "") << std::setw(10) << get(i, j);
}
std::cout << std::setw(3) << "]" << std::endl;
}
std::cout << "]" << std::endl;
}
template<int other_col_num>
mat<row_num, other_col_num, val_t> dot(const mat<col_num, other_col_num, val_t>& mt) const
{
return ::dot(*this, mt);
}
mat<row_num, col_num, val_t> rot180() const
{
mat<row_num, col_num, val_t> ret;
for (int r = 0; r < row_num; ++r)
{
for (int c = 0; c < col_num; ++c)
{
ret.get(r, c) = get(row_num-1-r, col_num-1-c);
}
}
return ret;
}
template<int row_base, int col_base, int row_num_other, int col_num_other>
void assign(const mat<row_num_other, col_num_other, val_t>& mt_other)
{
/* 这里不麻烦了,直接写成运行时 */
for (int r = 0; r < row_num_other; ++r)
{
for (int c = 0; c < col_num_other; ++c)
{
if (r + row_base < 0 || c + col_base < 0)
{
continue;
}
if (r + row_base >= row_num || c + col_base >= col_num)
{
break;
}
get(r + row_base, c + col_base) = mt_other.get(r, c);
}
}
}
template<int top_pad, int left_pad, int right_pad, int bottom_pad>
mat<row_num + top_pad + bottom_pad, col_num + left_pad + right_pad, val_t>
pad() const
{
using mat_ret_t = mat<row_num + top_pad + bottom_pad, col_num + left_pad + right_pad, val_t>;
mat_ret_t mt_ret;
mt_ret.assign<top_pad, left_pad>(*this);
return mt_ret;
}
template<int row_span, int col_span>
mat<row_num + row_span*(row_num - 1), col_num + col_span*(col_num-1)>
span() const
{
using mat_ret_t = mat<row_num + row_span * (row_num - 1), col_num + col_span * (col_num - 1)>;
mat_ret_t mt_ret;
for (int r = 0; r < row_num; ++r)
{
for (int c = 0; c < col_num; ++c)
{
mt_ret.get(r*(row_span + 1), c*(col_span + 1)) = get(r, c);
}
}
return mt_ret;
}
template<int row_base, int col_base, int row_len, int col_len>
val_t region_max(int& i_row, int& i_col) const
{
static_assert(row_base < row_num && col_base < col_num, "region_max overflow!!!");
val_t d_max = -1. * DBL_MAX;
for (int r = row_base; r < row_base + row_len && r < row_num; ++r)
{
for (int c = col_base; c < col_base + col_len && c < col_num; ++c)
{
if (d_max < get(r, c))
{
i_row = r, i_col = c;
d_max = get(r, c);
}
}
}
return d_max;
}
mat<row_num*col_num, 1, val_t> one_col() const
{
mat<row_num*col_num, 1, val_t> ret;
ret.pval = pval;
return ret;
}
template<typename t>
static void print_sub_type(t)
{
std::cout << typeid(t).name();
}
template<int r, int c, typename t>
static void print_sub_type(mat<r, c, t>)
{
mat<r, c, t>::print_type();
}
static void print_type()
{
printf("<matrix %d * %d type: ", row_num, col_num);
print_sub_type<val_t>(val_t());
printf(">\r\n");
}
const val_t& operator[](const int& idx) const
{
return pval->p[idx];
}
val_t& operator[](const int& idx)
{
return pval->p[idx];
}
friend std::ostream& operator<<(std::ostream& ofs, const mat<row_num, col_num, val_t>& mt)
{
std::cout << "[" ;
for (int i = 0; i < row_num; ++i)
{
std::cout << std::setw(3) << "[";
for (int j = 0; j < col_num; ++j)
{
std::cout << (j != 0 ? "," : "") << mt.get(i, j);
}
std::cout << std::setw(3) << "]";
}
std::cou