深度可分离卷积-CSDN博客

原文：https://blog.csdn.net/makefish/article/details/88716534

下面这个文章介绍了深度可分离卷积是怎么做的：
https://towardsdatascience.com/a-basic-introduction-to-separable-convolutions-b99ec3102728
https://eli.thegreenplace.net/2018/depthwise-separable-convolutions-for-machine-learning/
本文的很多内容都是在这两个文章的基础上整理的。

卷积基础
描述一个二维矩阵，使用row col。三维的，使用channel row col。四维则多了一个参数：batch channel row col。batch channel row col的逻辑顺序则和数据格式有关，常见的有NHWC和NCHW：https://mp.weixin.qq.com/s/I4Q1Bv7yecqYXUra49o7tw?

2D卷积
2D卷积只有col row的概念。（略）

3D卷积和4D卷积
我们先看3D卷积。
假设过滤器窗口是3x3x3（其中一个3代表了in_depth）。有四个这样的窗口，用于提取同一个图片的四个属性(out_depth指定，对应输出Out channel 0…3)。那么，针对图片里面的某个Batch（譬如Batch 0），其处理流程如下：

真正的源码实现，i，j代表了输出的某个属性的任意位置的值。这个值，是窗口和输入卷积得来的。

参考的源代码（Copy 自引文）：


 
 
   
   
    
    
   
   
   
   
    
    
     
     def conv2d_multi_channel(input, w):
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     """Two-dimensional convolution with multiple channels.
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         Uses SAME padding with 0s, a stride of 1 and no dilation.
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         input: input array with shape (height, width, in_depth)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         w: filter array with shape (fd, fd, in_depth, out_depth) with odd fd.
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
            in_depth is the number of input channels, and has the be the same as
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
            input's in_depth; out_depth is the number of output channels.
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         Returns a result with shape (height, width, out_depth).
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         """
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     assert w.shape[
     
     0] == w.shape[
     
     1] 
     
     and w.shape[
     
     0] % 
     
     2 == 
     
     1
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         padw = w.shape[
     
     0] // 
     
     2
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         padded_input = np.pad(input,
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                               pad_width=((padw, padw), (padw, padw), (
     
     0, 
     
     0)),
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                               mode=
     
     'constant',
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                               constant_values=
     
     0)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         height, width, in_depth = input.shape
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     assert in_depth == w.shape[
     
     2]
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         out_depth = w.shape[
     
     3]
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         output = np.zeros((height, width, out_depth))
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     for out_c 
     
     in range(out_depth):
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     # For each output channel, perform 2d convolution summed across all
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     # input channels.
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     for i 
     
     in range(height):
    
    
   
   

   
   
    
    
   
   
   
   
    
                
     
     for j 
     
     in range(width):
    
    
   
   

   
   
    
    
   
   
   
   
    
                    
     
     # Now the inner loop also works across all input channels.
    
    
   
   

   
   
    
    
   
   
   
   
    
                    
     
     for c 
     
     in range(in_depth):
    
    
   
   

   
   
    
    
   
   
   
   
    
                        
     
     #下面这段应该封装为一个新的函数：用于求解输出的某个属性的卷积。
    
    
   
   

   
   
    
    
   
   
   
   
    
                        
     
     for fi 
     
     in range(w.shape[
     
     0]):
    
    
   
   

   
   
    
    
   
   
   
   
    
                            
     
     for fj 
     
     in range(w.shape[
     
     1]):
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                                 w_element = w[fi, fj, c, out_c]
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                                 output[i, j, out_c] += (
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                                     padded_input[i + fi, j + fj, c] * w_element)
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     return output