深度学习笔记9：卷积层的实现（forward, backward 的实现）

最新推荐文章于 2025-02-08 18:12:45 发布

梅津太郎

最新推荐文章于 2025-02-08 18:12:45 发布

阅读量1.9k

点赞数 1

分类专栏：动手学深度学习

本文链接：https://blog.csdn.net/gaocui883/article/details/116517812

版权

动手学深度学习专栏收录该内容

37 篇文章

订阅专栏

在这里插入图片描述

用for循环实现的卷积层：

卷积运算：






import time
""" 定义2维度卷积的非矩阵操作"""
def Conv2d(X,W, stride, pad, dilation=1):
    """
    参数说明：
    :param X: 输入，batchsize, in_rows, in_cols, in_channels.
    :param W: 权重，shape: kernel_row, kernel_col, in_channels, out_channels.
    :param stride:
    :param pad: 填充，元组,或者 same，vilad
    :param dilation: 扩张率。
    :return: 卷积的结果，shape: batchsize, out_rows, out_cols, outchannels
    """
    s, d = stride, dilation
    X_pad, p = Pad2D(X, pad, W.shape[:2], stride=s, dilation=dilation)
    pr1, pr2, pc1, pc2 = p
    print(p)
    x_samples, x_rows, x_cols, in_channels = X.shape
    fr,fc,in_channels,out_channels = W.shape
    # 先计算输出的形状：
    # 扩张情况下
    _fr, _fc = fr + (fr-1)*(d-1), fc + (fc-1)*(d-1)
    out_rows, out_cols = int((x_rows + pr1 + pr2 - _fr + s)/s) , int((x_cols + pc1 + pc2 +s - _fc)/s)
    # 然后构造输出矩阵，默认0填充，并计算各个位置的值。
    Z = np.zeros(shape=(x_samples, out_rows, out_cols, out_channels))
    # print(Z.shape)
    start_time = time.time()
    for m in range(x_samples):
        for c in range(out_channels):
            for i in range(out_rows):
                for j in range(out_cols):
                    # 以i,j 像素为中心进行卷积乘法。
                    i0, i1 = i*s, i*s + _fr  # 当前位置加上卷积核的大小。一样大或者较小。
                    j0, j1 = j*s, j*s + _fc
                    # 从xpad 中切片窗口。
                    window = X_pad[m, i0:i1:d, j0:j1:d, :] # 第m个样本： 所有输入通道，shape : k,k,inchannel.
                    Z[m,i,j,c] =np.sum( window * W[:,:,:,c])   # w[:,:,:,c] shape : k,k,inchanel.
                    # 各个通道的和,并且是个标量。
    t = time.time() - start_time
    print(f"compute timing used {t}")
    return Z

卷积层：

""" 反向传播"""
from utils import LayerBase,WeightInitializer,ActivationInitializer
from init_method import glorot_uniform

# 正式建立卷积层，上边的只是运算，下边将建立卷积层，需要有正向传播，反向传播，参数，梯度列表等。
class Conv2D(LayerBase):
    def __init__(self,
                 out_ch,
                 kernel_shape,
                 pad=0,
                 stride=1,
                 dilation=1,
                 acti_fn=None,
                 optimizer=None,
                 init_w = "glorot_uniform",):
        """
        二维卷积实现
        :param out_ch: 输出通道，也就是卷积核的数目，int
        :param kernel_shape: 卷积核的形状，(int,int)
        :param pad: 扩展，tuple 或者 same 或者valid
        :param stride:
        :param dilation:膨胀系数
        :param acti_fn: 激活函数
        :param optimizer:
        :param init_w:
        """
        super().__init__(optimizer)
        self.pad = pad
        self.stride = stride
        self.dilation = dilation
        self.acti_fn = acti_fn
        self.kernel_shape = kernel_shape
        self.out_ch = out_ch
        self.in_ch = None
        self.init_w = init_w
        self.init_weight = WeightInitializer(mode=init_w)
        self.acti_fn = ActivationInitializer(acti_name=acti_fn)()
        self.parameters = {"W": None, "b":None}
        self.is_initialized = False

    def _init_params(self):
        """ 对参数进行各种优化"""
        fr,fc = self.kernel_shape # 卷积核
        W = self.init_weight((fr, fc, self.in_ch, self.out_ch))
        b = np.zeros((1,1,1,self.out_ch)) # output shape : nsampels, h,w, outchannels.
        self.params = {"W":W, "b":b}
        self.gradients = {"W":np.zeros_like(W), "b": np.zeros_like(b)}
        self.derived_variables = {"Y": []}  # 用来记录未被激活的函数。
        self.is_initialized = True

    def forward(self,X, retain_derived=True):
        """
        正向传播
        :param X: parameters
        :return:
        """
        if not self.is_initialized :
            self.in_ch = X.shape[3] # bz, h, w, c
            self._init_params()
        W = self.params["W"]
        b = self.params["b"]

        n_samp, in_rows, in_cols, in_ch = X.shape
        s, p , d = self.stride, self.pad, self.dilation

        Y = Conv2d(X, W, s, p, d) + b

        # active Y to a:
        a = self.acti_fn(Y)
        if retain_derived:
            self.X.append(X)   # X 是list形式保存的。
            self.derived_variables["Y"].append(Y)
        return  a #结果是经过激活过的。


    def backward(self, dLda, retain_grads=True):  # dLda 是对激活后的a的梯度，而dLdY 则为dlda * dady , dady = acti_fn.grad(Y)
        """
        反向传播
        :param dLda: 后边一层或者结果的梯度。
        :param retain_grads: 是否保存梯度
        :return: dXs : 当前卷积对于输入对关于算是的梯度，shape: (n_samples, in_rows, in_cols, in_ch)
        """
        if not isinstance(dLda, list):
            dLda = [dLda] #list,因为X也是以list进行保存的。

        W = self.params["W"]
        b = self.params["b"]
        Ys = self.derived_variables["Y"]

        Xs , d = self.X, self.dilation

        (fr, fc), s, p = self.kernel_shape, self.stride, self.pad

        dXs = []
        for X,Y, da in zip(Xs, Ys, dLda):  # 这里这样如果每个batch都进行反向传播，其实x,y,da 就是 xs， ys， dlda
            # 但是如果因为显存问题，而设置成每两个或者多个batch进行反向传播一次的话，那么此时的XYda 存储的将是
            # 多个 batch 的列表，所以需要这样进行一个for循环。
            n_samp, out_rows, out_cols, out_ch = da.shape
            # n_samp : 就是batchsize
            X_pad, (pr1, pr2, pc1, pc2) = Pad2D(X, p, self.kernel_shape,s,d)

            dY = da * self.acti_fn.grad(Y) # 这里的Y经过激活的，所以中间需要计算额外的
            dX = np.zeros_like(X_pad)

            dW , db = np.zeros_like(W), np.zeros_like(b)

            for m in range(n_samp):
                for i in range(out_rows):
                    for j in range(out_cols):
                        for c in range(out_ch):  #单个算术单个算术的计算。
                            i0, i1 = i*s, (i*s) + fr + (fr-1)*(d-1) # DX 的坐标。
                            j0, j1 = j*s, (j*s) + fc + (fc-1)*(d-1)
                            wc = W[:,:,:,c]
                            kernel = dY[m,i,j,c]
                            window = X_pad[m, i0:i1:d, j0:j1:d, :]
                            db[:,:,:,c] += kernel
                            dW[:,:,:,c] += window*kernel   # Z[m,i,j,c] =np.sum( window * W[:,:,:,c])
                            dX[m, i0:i1:d, j0:j1:d, :]  += (wc* kernel) # dX 的梯度会由多个y贡献。

            if retain_grads :
                self.gradients["W"] += dW
                self.gradients["b"] += db
            pr2 = None if pr2==0 else -pr2
            pc2 = None if pc2==0 else -pc2
            dXs.append(dX[:, pr1:pr2, pc1:pc2, :])  # 去掉pad 0 值。

        return dXs[0] if len(Xs)==1 else dXs


    @property
    def hyperparams(self):
        return {
        "layer": "Conv2D",
        "pad": self.pad,
        "init_w": self.init_w,
        "in_ch": self.in_ch,
        "out_ch": self.out_ch,
        "stride": self.stride,
        "dilation": self.dilation,
        "acti_fn": str(self.acti_fn),
        "kernel_shape": self.kernel_shape,
        "optimizer": {
        "cache": self.optimizer.cache,
        "hyperparams": self.optimizer.hyperparams,
        },
}

用矩阵实现的卷积运算和卷积层

将img 转化成条状，详细见链接


import numpy as np
from different_convolution import Pad2D
def _im2col_indices(x_shape, fr, fc, p, s, d=1):
    """ 计算各个索引"""
    pr1, pr2, pc1, pc2 = p
    n_ex, n_in, in_rows, in_cols = x_shape
    _fr, _fc = fr + (fr - 1) * (d - 1), fc + (fc - 1) * (d - 1)

    out_rows = int((in_rows + pr1 + pr2 - _fr + s) / s)
    out_cols = int((in_cols + pc1 + pc2 - _fc + s) / s)
    print(out_rows,out_cols)
    # 28 28
    i0 = np.repeat(np.arange(fr), fc)  # 000111222   * n_in.
    # 000111222
    i0 = np.tile(i0, n_in) * d

    i1 = s * np.repeat(np.arange(out_rows), out_cols)  # 00000..0 11111..1 2222..2.
    # 这里i1 的个数其实就是输出的图像的尺度的长宽大小。
    # 对于每一个位置，都需要相应的卷积得到结果。
    j0 = np.tile(np.arange(fc), fr * n_in)  # 相当与相对索引。
    j1 = s * np.tile(np.arange(out_cols), out_rows) # 相当于绝对索引。 i1 j1 确定位置， i0，j0 确定卷积。得到切块。
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    # 第二个的索引。
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)
    # 第三个索引。
    k = np.repeat(np.arange(n_in), fr * fc).reshape(-1, 1)
    return k, i, j


# k,i,j = _im2col_indices((10, 3, 28,28), 3,3, (1,1,1,1), s=1, d=1)
# print(k.shape)  # 27 1  #通道维度保证了 三维坐标。
# [
#  [0]
#  [0]
#  [0]
#  [1]
#  [1]
#  [1]
#  [2]
#  [2]
#  [2]
#  ]


# print(i.shape)  # 27 784
# [[ 0  0  0 ... 27 27 27]   # 自上而下的对应。 一共啊27 行，也就是27个卷积块。27 : k*k*n_in, 784: out*out
# #  [ 0  0  0 ... 27 27 27]
# #  [ 0  0  0 ... 27 27 27]
# #  ...
# #  [ 2  2  2 ... 29 29 29]
# #  [ 2  2  2 ... 29 29 29]
# #  [ 2  2  2 ... 29 29 29]]


# print(j.shape)  # 27 784

# [[ 0  1  2 ... 25 26 27]
#  [ 1  2  3 ... 26 27 28]
#  [ 2  3  4 ... 27 28 29]
#  ...
#  [ 0  1  2 ... 25 26 27]
#  [ 1  2  3 ... 26 27 28]
#  [ 2  3  4 ... 27 28 29]]
def im2col(X, W_shape, pad, stride, dilation=1):
    fr, fc, n_in, n_out = W_shape
    s, p, d = stride, pad, dilation
    n_samp, in_rows, in_cols, n_in = X.shape
    X_pad, p = Pad2D(X, p, W_shape[:2], stride=s, dilation=d)
    pr1, pr2, pc1, pc2 = p
    # 将输入的通道维数移至第二位
    X_pad = X_pad.transpose(0, 3, 1, 2)
    k, i, j = _im2col_indices((n_samp, n_in, in_rows, in_cols), fr, fc, p, s, d)
    # X_col.shape = (n_samples, kernel_rows*kernel_cols*n_in, out_rows*out_cols)
    X_col = X_pad[:, k, i, j]
    X_col = X_col.transpose(1, 2, 0).reshape(fr * fc * n_in, -1)
    return X_col, p

将col_img 转换回img形式（4d）


""" 矩阵方法实现的卷积层"""
def col2im(X_col, X_shape, W_shape, pad, stride, dilation=0):
    """
    功能：：将2d 图像变化为4d 图像。
    :param X_col:
    :param X_shape: 原始输入形状： bs, in_rows, in_cols, in_ch
    :param W_shape: krenel_rows, kernel_cols, in_ch, out_ch
    :param pad: 4-tuple
    :param stride: int 型
    :param dilation: 扩张率 default =1
    :return image : nsamples, in_rows, in_cols, in_ch
    """
    s, d = stride, dilation
    pr1, pr2, pc1, pc2 = pad

    fr, fc, n_in, n_out = W_shape
    n_samp, in_rows, in_cols, n_in = X_shape
    X_pad = np.zeros((n_samp, n_in, in_rows + pr1+pr2, in_cols+ pc1+pc2)) # 输出的图像，现在要做的就是将X_col 转换成X_pad 的形式。
    k,i,j = _im2col_indices(x_shape =(n_samp, n_in, in_rows, in_cols), fr=fr, fc=fc, p=pad, s=s, d=d)
    # 得到了相应索引，要做的就是通过这个索引来将col格式的图像转化回img格式的图像。
    # X_col.shape = (n_samples, kernel_rows * kernel_cols * n_in, out_rows * out_cols)
    X_col_reshaped = X_col.reshape(n_in*fr*fc, -1, n_samp) # shape: (nin*k*k, out_r*out_col, n_samp).
    X_col_reshaped = X_col_reshaped.transpose(2,0,1)  # shape : (n_samp, nin*k*k, out_r*out_col)

    # 之前是从xpad的索引kij得到xcol_reshaped , 现在是要反向操作从 x_col_reshaped 得到xpad
    np.add.at(X_pad, (slice(None), k, i,j), X_col_reshaped) # ???
    # slice 的作用就是等价与： 所以中间： (slice(none), k, i, j) ---> (:,k,i,j )
    # at 函数的作用：a[indices] += b`
    # np.add.at 是np.unfunc.at(a, idc, b) 的一种应用，但是如果idc重复的索引会计算多次，但是
    # 本稳重的 slice (k,i,j) 对应的索引都是不重复的，所以并不会被计算多次？？？？
    # b = np.asarray([[1, 2, 3, 4, 5, 6]])

    # bexp = b[:,[[2,3,4],[3,4,5]]]   ------ 3 4 是重复的位置。
    # bexp
    # Out[56]:
    # array([[[3, 4, 5],
    #         [4, 5, 6]]])
    # c = np.zeros_like(b)
    # c
    # Out[58]: array([[0, 0, 0, 0, 0, 0]])
    # np.add.at(c, (slice(None),[[2,3,4],[3,4,5]] ), bexp)
    # c
    # Out[60]: array([[ 0,  0,  3,  8, 10,  6]])
    # b
    # Out[61]: array([[1, 2, 3, 4, 5, 6]])
    # c[(slice(None),[[2,3,4],[3,4,5]] )] += bexp
    # b
    # Out[63]: array([[1, 2, 3, 4, 5, 6]])
    # c
    # Out[64]: array([[ 0,  0,  6, 12, 15, 12]])
    # d = np.zeros_like(c)
    # d[(slice(None),[[2,3,4],[3,4,5]] )] += bexp
    # d
    # Out[67]: array([[0, 0, 3, 4, 5, 6]])
    pr2 = None if pr2==0 else pr2 = -pr2
    pc2 = None if pc2==0 else pc2 = -pc2
    # 定义尾部 切去两边，得到原图像。但是这里有个问题是： np.add.at() 会将重复的进行多次相加。xpad 在取索引的时候就是被
    # 多次取重复的元素，那么有的元素就会多次相加。
    # 修正：
    X_pad[(slice(None), k,i,j)] += X_col_reshaped
    # 这样的xpad 才是原始的xpad

    pr2 = None if pr2 == 0 else pr2 = -pr2
    pc2 = None if pc2 == 0 else pc2 = -pc2

    return X_pad[:,:,pr1:pr2, pc1:pc2]

卷积运算：


def conv2D_gemm(X,W, stride=0, pad="same", dilation=1):
    s, d = stride, dilation
    _, p = Pad2D(X, pad, W.shape[:2], s, dilation=dilation)
    pr1, pr2, pc1, pc2 = p
    fr, fc, in_ch, out_ch = W.shape
    n_samp, in_rows, in_cols, in_ch = X.shape
    # 考虑扩张率
    _fr, _fc = fr + (fr - 1) * (d - 1), fc + (fc - 1) * (d - 1)
    # 输出维数，根据上面公式可得
    out_rows = int((in_rows + pr1 + pr2 - _fr) / s + 1)
    out_cols = int((in_cols + pc1 + pc2 - _fc) / s + 1)
    # 将 X 和 W 转化为 2D 矩阵并乘积
    X_col, _ = im2col(X, W.shape, p, s, d)
    W_col = W.transpose(3, 2, 0, 1).reshape(out_ch, -1)
    Z = (W_col @ X_col).reshape(out_ch, out_rows, out_cols, n_samp).transpose(3, 1, 2, 0)
    return Z
# test the conv2d :
"""x = np.random.random(size=(10,32,32,3)) # 3 通道的，batchsize 10， 大小为 32 32
w = np.random.random(size=(3,3,3,64)) # 输入为3通道，输出为64通道，kernel 3 3

# print(x.shape)
# print(w.shape)
import time
start = time.time()
conv_result = conv2D_gemm(x,w, stride=2, pad="same", dilation=1)
print(f"using time  : {time.time()-start}")
print(conv_result.shape)
"""

卷积层：


""" 定义卷积层，矩阵实现"""
class Conv2D_gemm(LayerBase):
    """ 卷积的矩阵实现，重点是怎么进行反向传播的定义"""
    def __init__(self,
                 out_ch,
                 kernel_shape,
                 pad=0,
                 stride=1,
                 dilation=1,
                 acti_fn=None,
                 optimizer=None,
                 init_w = "glorot_uniform",
                 ):
        """
        参数说明：
        :param out_ch: 输出通道
        :param kernel_shape: 单个卷积核的形状。
        :param pad: 扩展。
        :param stride: 卷积核的卷积幅度，int型号。
        :param dilation: 扩展率。
        :param acti_fn: 激活函数。
        :param optimizer:
        :param init_w:
        """
        super().__init__(optimizer)
        self.out_ch = out_ch
        self.in_ch = None
        self.pad = pad
        self.kernel_shape = kernel_shape
        self.stride = stride
        self.dilation = dilation
        self.optimizer = optimizer
        self.init_w = init_w
        self.init_weights = WeightInitializer(mode=init_w)
        self.acti_fn = ActivationInitializer(acti_fn)()
        self.params = {"W":None, "b":None}
        self.is_initialized = False

    def _init_params(self):
        fr, fc = self.kernel_shape
        W = self.init_weights((fr,fc, self.in_ch, self.out_ch))
        b = np.zeros((1,1,1, self.out_ch))
        self.params = {"W":np.zeros_like(W), "b":np.zeros_like(b)}
        self.derived_variables = {"Y":[]}
        self.is_initialized = True


    def forward(self,X,retain_derived=True):
        """ 正向传播，使用矩阵的方法进行"""
        if not self.is_initialized:
            self.in_ch = X.shape[3]  # x shape : nsamp, w, h , inch .
            self._init_params()

        W = self.params["W"]
        b = self.params["b"]
        n_samp, in_rows, in_cols, in_ch = X.shape
        s, p, d = self.stride, self.pad, self.dilation

        Y = conv2D_gemm(X, W, stride=s, pad=p, dilation=d) + b   # Y = X * W
        a = self.acti_fn(Y)

        if retain_derived :
            self.X.append(X)
            self.derived_variables["Y"].append(Y)

        return a


    def backward(self, dLda, retain_grads=True):
        """
        反向传播的定义
        :param dLda:  shape : nsamp, outrows, outcols, outch. # 形状与a, Y是一样的。
        :param retain_gradient: 保存参数的梯度。
        :return: 返回的是输入X的梯度。
        """
        if not isinstance(dLda, list):
            dLda = [dLda]
        X = self.X
        dX = []
        Y = self.derived_variables["Y"]
        for da, x, y in zip(dLda, X, Y):
            dx, dw, db = self._bwd(da,x,y)
            dX.append(dx)
            if retain_grads:
                self.gradients["W"] += dw
                self.gradients["b"] += db
        return dX[0] if len(dX)==1 else dX

    def _bwd(self, dLda, X, Y):
        """ 反向传播的计算细节"""
        W = self.params["W"]
        d = self.dilation
        fr,fc, in_ch, out_ch = W.shape
        n_samp, out_rows, out_cols, out_ch = dLda.shape
        # 这里先将X转化成条状形式的，因为正向传播就是这样计算的，然后应用这个条状形式的X进行梯度计算，
        # 此时得到的梯度形状当然也会是条状形式的，但是我们可以应用索引的 运算反向映射会相应的4D形式。
        (fr,fc), s, p = self.kernel_shape, self.stride, self.pad

        dLdy = dLda * self.acti_fn.grad(Y) # shape : nsamp, outrows, outcols, outch. # 形状与a, Y是一样的。
        dLdy_col = dLdy.transpose(3,1,2,0).reshape(out_ch, -1)  # # Y = W * X
        # outch, outrows, outcols, nsamp. -->  outch, outrows*outcols*nsamp
        X_col, p = im2col(X, W.shape, p, s, d)  # X_col shape : nin*fr*fc , noutrows*outcols*nsamp
        W_col = W.transpose(3,2,0,1).reshape(out_ch, -1).T  # fr,fc, nin,nout. ---> nout,nin,fr,fc ---> nout, nin*fr*fc
        # dW = DY*X.T
        # dx = W.t * dy
        # db = dy.sum(axis=1).reshape(1,1,1,-1)
        dW = (dLdy_col @ X_col.T).reshape(out_ch, in_ch, fr,fc).transpose(2,3,1,0)
        db = dLdy_col.sum(axis=1).reshape(1,1,1,-1)
        dX_col = W_col @ dLdy_col  # col 形状的。
        dX = col2im(dX_col, X.shape, W.shape, p, s, d).transpose(0,2,3,1)
        return dX, dW, db

    @property
    def hyperparams(self):
        return {
            "layer": "Conv2D",
            "pad": self.pad,
            "init_w": self.init_w,
            "in_ch": self.in_ch,
            "out_ch": self.out_ch,
            "stride": self.stride,
            "dilation": self.dilation,
            "acti_fn": str(self.acti_fn),
            "kernel_shape": self.kernel_shape,
            "optimizer": {
                "cache": self.optimizer.cache,
                "hyperparams": self.optimizer.hyperparams,
        },
    }