numpy搭建可训练式CNN（一）：使用im2col的前向传播

最新推荐文章于 2022-05-13 10:30:28 发布

野蛮大胖

最新推荐文章于 2022-05-13 10:30:28 发布

阅读量391

点赞数

分类专栏：深度学习笔记

本文链接：https://blog.csdn.net/weixin_43732906/article/details/107418861

版权

深度学习笔记专栏收录该内容

2 篇文章 0 订阅

订阅专栏

通过摆脱pytorch和tensorflow完全搭建神经网络来夯实一下基础，害，双非的卑微努力

本篇搭建了卷积部分和池化的前向传播

首先需要复习卷积的基础概念
输入（feature), 卷积核（filter）中对应点的相乘

上图很清楚的表示了单通道feature的卷积计算，在多通道下，需要注意的是一个卷积核的shape(c, h, w)，在每个卷积核的对应通道和feature的对应通道相乘后加起来，具体过程如下图所示
在这里插入图片描述
在理解上面的卷积过程后，如果自己尝试卷积，可能需要至少6个for循环，极大拖延效率，所以此次使用im2col来实现卷积的过程。

im2col原理

通过把每次卷积的区域保存成为一维数组，卷积的操作次数就是我们需要保存的所有个数，构成了二维数组[h_out * w_out * n, f * f * c ]，同样卷积核也变幻成了这样的二维数组，这样就可以通过矩阵的乘法一次得出结果，速度比起原始的for循环快很多，这也是caffe的实现方式。

def Im2col(feature, w_shape, shape, stride):
    c_in, f, f, c_out = w_shape
    n, h_out, w_out, c_out = shape
    col_feature = []
    for i in range(n):
        for ii in range(h_out):
            for iii in range(w_out):
                col_feature.append(feature[i:i+1, ii * stride: ii * stride + f, iii * stride: iii * stride + f, :])
    col_feature = np.array(col_feature).reshape(h_out * w_out * n, -1)
    return col_feature

池化部分如下

def Pool_Forward(image, f, stride, mode='max'):
    n, h, w, c = image.shape
    hh = ww = int(1 + (h - f) / stride)
    out = np.ones((n, hh, ww, c))
    for i in range(n):
        for ii in range(hh):
            for iii in range(ww):
                for iiii in range(c):
                    if mode == 'max':
                        out[i, ii, iii, iiii] = np.max(image[i, ii : ii + f, iii : iii + f, iiii])
                    elif mode == 'ave':
                        out[i, ii, iii, iiii] = np.mean(image[i, ii: ii + f, iii: iii + f, iiii])
    return out

总体部分如下

import numpy as np

#  N H W C

def Padding(image, pad):
    return np.pad((0, 0), (pad, pad), (pad, pad), (0, 0), 'constant')

def Pool_Forward(image, f, stride, mode='max'):
    n, h, w, c = image.shape
    hh = ww = int(1 + (h - f) / stride)
    out = np.ones((n, hh, ww, c))
    for i in range(n):
        for ii in range(hh):
            for iii in range(ww):
                for iiii in range(c):
                    if mode == 'max':
                        out[i, ii, iii, iiii] = np.max(image[i, ii : ii + f, iii : iii + f, iiii])
                    elif mode == 'ave':
                        out[i, ii, iii, iiii] = np.mean(image[i, ii: ii + f, iii: iii + f, iiii])
    return out


def Im2col(image, w_shape, shape, stride):
    f, f, c_in, c_out = w_shape[]
    n, h_out, w_out, c_out = shape
    out = []
    for i in range(h_out):
        for ii in range(w_out):
            out.append(image[:, i * stride : i * stride + f, ii * stride : ii * stride + f, :])
    out = out.reshape(h_out * w_out * n, -1)
    return out

def Conv_Forward(feature_in, weight, bias, stride, pad):
    #  prepare
    (n, h_in, w_in, c_in) = feature_in.shape
    (c_in, f, f, c_out) = weight.shape

    if pad == 'same':
        w_out = w_in / stride
        w_out = h_out = w_out + (w_out - int(w_out) > 0) # 向上取整

        padding = f - stride

    elif pad == 'valid':
        w_out = (w_in - f + 1) / stride
        w_out = h_out = w_out + (w_out - int(w_out) > 0)
        padding = 0

    shape = (n, h_out, w_out, c_out)

    feature_in_pad = Padding(feature_in, padding)
    col_feature_in = Im2col(feature_in_pad, weight.shape, shape, stride)   #  h_out * w_out * n, f * f * c_in

    col_weight = weight.reshape(f * f * c_in, -1)       #  f * f * c_in, c_out

    feature_out = np.dot(col_feature_in, col_weight).reshape(n, h_out, w_out, c_out)

    cache = [col_feature_in, weight, bias, stride, pad, feature_in.shape]
    return feature_out, cache