卷积操作具体实现

最新推荐文章于 2023-11-07 21:00:04 发布

东东就是我

最新推荐文章于 2023-11-07 21:00:04 发布

阅读量545

点赞数

分类专栏：深度学习之PyTorch物体检测实战(笔记) 文章标签： pytorch 深度学习 python

本文链接：https://blog.csdn.net/qq_33228039/article/details/120749762

版权

深度学习之PyTorch物体检测实战(笔记) 专栏收录该内容

6 篇文章 5 订阅

订阅专栏

1.具体结论

我们都知道卷积操作就是滑动卷积核与图像点乘然后相加。但是pytorch不是这么操作的
具体原理就是把图像和卷积核都缩放成一个二维矩阵，然后矩阵相乘。也就是im2col
在这里插入图片描述
参考：https://mp.weixin.qq.com/s/GPDYKQlIOq6Su0Ta9ipzig
https://zhuanlan.zhihu.com/p/46305636

2.具体代码实现：

1.1* H* W
2.B* C* H* W

import  numpy as np
import torch
from torch import nn
# #1,H,W
# src=np.array(np.arange(0,7**2).reshape(7,7))
# intH,intW=src.shape
#
# kernel = np.array([-0.2589,  0.2106, -0.1583, -0.0107,  0.1177,  0.1693, -0.1582, -0.3048, -0.1946]).reshape(3,3)
# KHeight, KWeight = kernel.shape
#
# row_num=intH-KHeight+1
# col_num=intW-KWeight+1
#
# OutScrIm2Col = np.zeros([KHeight*KWeight,row_num*col_num])
#
# ii,jj=0,0
# col_cnt,row_cnt=0,0
# for i in range(row_num):
#     for j in range(col_num):
#         ii=i
#         jj=j
#         for iii in range(KHeight):
#             for jjj in range(KHeight):
#                 OutScrIm2Col[col_cnt][row_cnt]=src[ii][jj]
#                 jj+=1
#                 col_cnt+=1
#             ii+=1
#             jj=j
#         col_cnt=0
#         row_cnt+=1
#
# im2col_kernel = kernel.reshape(-1,9)
# out = np.matmul(im2col_kernel,OutScrIm2Col)
# print(out.reshape(5,5))
#
# import torch
# from torch import nn
# import numpy as np
# torch.manual_seed(100)
#
# net = nn.Conv2d(1, 1, 3, padding=0, bias=False)
#
# scr = np.array(np.arange(0, 7**2).reshape(1, 1, 7, 7)).astype(np.float32)
# scr = torch.from_numpy(scr)
#
# print(net.weight.data) # 把这里的weight的值复制到上面numpy的代码来做验证
# print(net(scr))
#C,H,W
np.set_printoptions(threshold=np.inf)
src = np.array(np.arange(0, 9**4))[0:9*9*3*5]
src = src.reshape(-1, 3, 9, 9)
print(src.shape)
# kernel = np.array([[[[-0.1158,  0.0942, -0.0708],
#           [-0.0048,  0.0526,  0.0757],
#           [-0.0708, -0.1363, -0.0870]],
#
#          [[-0.1139, -0.1128,  0.0702],
#           [ 0.0631,  0.0857, -0.0244],
#           [ 0.1197,  0.1481,  0.0765]],
#
#          [[-0.0823, -0.0589, -0.0959],
#           [ 0.0966,  0.0166,  0.1422],
#           [-0.0167,  0.1335,  0.0729]],
#
#          [[-0.0032, -0.0768,  0.0597],
#           [ 0.0083, -0.0754,  0.0867],
#           [-0.0228, -0.1440, -0.0832]],
#
#          [[ 0.1352,  0.0615, -0.1005],
#           [ 0.1163,  0.0049, -0.1384],
#           [ 0.0440, -0.0468, -0.0542]]]])

kernel = np.array([[[[-0.1495,  0.1216, -0.0914],
          [-0.0062,  0.0679,  0.0977],
          [-0.0913, -0.1760, -0.1124]],

         [[-0.1470, -0.1456,  0.0907],
          [ 0.0815,  0.1107, -0.0314],
          [ 0.1545,  0.1913,  0.0987]],

         [[-0.1063, -0.0761, -0.1238],
          [ 0.1246,  0.0215,  0.1836],
          [-0.0215,  0.1724,  0.0941]]],


        [[[-0.0042, -0.0991,  0.0771],
          [ 0.0107, -0.0973,  0.1120],
          [-0.0294, -0.1859, -0.1074]],

         [[ 0.1746,  0.0794, -0.1298],
          [ 0.1502,  0.0063, -0.1786],
          [ 0.0568, -0.0604, -0.0700]],

         [[ 0.0100, -0.1753,  0.0048],
          [ 0.1559,  0.0381, -0.0212],
          [ 0.0877, -0.0168, -0.0620]]],


        [[[ 0.0466,  0.0204,  0.0730],
          [-0.0505,  0.1560,  0.1292],
          [-0.0755,  0.0664,  0.0285]],

         [[ 0.1629,  0.1608,  0.0997],
          [ 0.1068,  0.0454, -0.0624],
          [-0.1089,  0.1714,  0.0814]],

         [[-0.1479,  0.0606, -0.0596],
          [-0.1750,  0.1847,  0.0211],
          [ 0.0719, -0.0031, -0.1637]]]])
# kernel = np.tile(kernel, 12)
# kernel = kernel.reshape(-1, 3, 3, 3)
print(kernel.shape)

scrN, srcChannel, intH, intW= src.shape
KoutChannel, KinChannel, kernel_H, kernel_W = kernel.shape
im2col_kernel = kernel.reshape(KoutChannel, -1)
outChannel, outH, outW =  KoutChannel, (intH - kernel_H + 1) , (intW - kernel_W + 1)
OutScrIm2Col = np.zeros( [scrN, kernel_H*kernel_W*KinChannel, outH*outW ] )#5 27 49



for bs in range(scrN):
    cnt_col = 0
    cnt_row = 0
    for Outim2colCol_H  in range(outH):
        for Outim2colCol_W  in range(outW):
            i_number=Outim2colCol_H
            j_number=Outim2colCol_W
            #  遍历卷积核大小的图像
            for c in range(KinChannel):
                for l in range(kernel_H):
                    for m in range(kernel_W):
                        OutScrIm2Col[bs][cnt_row][cnt_col]=src[bs][c][i_number][j_number]
                        j_number += 1
                        cnt_row+=1
                    j_number = Outim2colCol_W
                    i_number+=1
                i_number = Outim2colCol_H
                j_number = Outim2colCol_W
            cnt_col += 1
            cnt_row =0

print(OutScrIm2Col.shape)
Out =  np.matmul(im2col_kernel, OutScrIm2Col).reshape(5,3,7,7)
print(Out)
torch.manual_seed(100)

net = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, padding=0, bias=False)
print(net.weight.data.shape)
# print(net.weight.data)
#
scr = np.array(np.arange(0, 9**4))[:9*9*3*5].reshape(-1, 3, 9, 9).astype(np.float32)

# scr = torch.from_numpy(src)
# print("data:", scr.shape)
scr = torch.from_numpy(scr)
print("data:", scr.shape)
#
#
Out = net(scr)
print("Our:", Out.shape)
print(Out)

东东就是我

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
卷积操作具体实现

1.具体结论我们都知道卷积操作就是滑动卷积核与图像点乘然后相加。但是pytorch不是这么操作的具体原理就是把图像和卷积核都缩放成一个二维矩阵，然后矩阵相乘。也就是im2col参考：https://mp.weixin.qq.com/s/GPDYKQlIOq6Su0Ta9ipzighttps://zhuanlan.zhihu.com/p/463056362.具体代码实现：1.1* H* W2.B* C* H* Wimport numpy as npimport torchfrom to
复制链接

扫一扫