【yolov5s中加入DCNv2（可变形卷积v2）】

最新推荐文章于 2024-08-28 10:30:31 发布

just f

最新推荐文章于 2024-08-28 10:30:31 发布

阅读量6.2k

点赞数 14

分类专栏：深度学习文章标签： python 深度学习人工智能

本文链接：https://blog.csdn.net/weixin_42550418/article/details/128037557

版权

深度学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

yolov5s中加入DCNv2（可变形卷积v2）

Requirement

torch>=1.8.1
torchvision>=0.9.1

实现步骤

1.测试环境是否满足要求

import torch
import torchvision.ops
from torch import nn
import math


class DCNv2(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 stride=1,
                 padding=1):

        super(DCNv2, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride if type(stride) == tuple else (stride, stride)
        self.padding = padding
        
        # init weight and bias
        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size))
        self.bias = nn.Parameter(torch.Tensor(out_channels))

        # offset conv
        self.conv_offset_mask = nn.Conv2d(in_channels, 
                                          3 * kernel_size * kernel_size,
                                          kernel_size=kernel_size, 
                                          stride=stride,
                                          padding=self.padding, 
                                          bias=True)
        
        # init        
        self.reset_parameters()
        self._init_weight()


    def reset_parameters(self):
        n = self.in_channels * (self.kernel_size**2)
        stdv = 1. / math.sqrt(n)
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.zero_()


    def _init_weight(self):
        # init offset_mask conv
        nn.init.constant_(self.conv_offset_mask.weight, 0.)
        nn.init.constant_(self.conv_offset_mask.bias, 0.)


    def forward(self, x):
        out = self.conv_offset_mask(x)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)
        mask = torch.sigmoid(mask)

        x = torchvision.ops.deform_conv2d(input=x, 
                                          offset=offset, 
                                          weight=self.weight, 
                                          bias=self.bias, 
                                          padding=self.padding,
                                          mask=mask,
                                          stride=self.stride)
        return x

model = nn.Sequential(
    DCNv2(3, 32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2, 2),
    DCNv2(32, 32, kernel_size=3, stride=1, padding=1),
    DCNv2(32, 64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2, 2),
    DCNv2(64, 64, kernel_size=3, stride=1, padding=1),
    DCNv2(64, 128, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2, 2),
    DCNv2(128, 128, kernel_size=3, stride=1, padding=1),
    DCNv2(128, 256, kernel_size=3, stride=1, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(2, 2)
)
x = torch.randn(2, 3, 64, 64)
y = model(x)
print(x.size())
print(y.size())
"""
torch.Size([2, 3, 64, 64])
torch.Size([2, 256, 4, 4])
"""

如果能输出，则说明环境适配。

2.修改models/yolov5s.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# Parameters
nc: 1  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:

	- [10,13, 16,30, 33,23]  
	- [30,61, 62,45, 59,119]  # P4/16
	- [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [64, 6, 2, 2]],  						# 0-P1/2
   [-1, 1, DCNv2, [128, 3, 2]],  						# 1-P2/4
   [-1, 3, C3, [128]],									# 2	
   [-1, 1, DCNv2, [256, 3, 2]],  						# 3-P3/8
   [-1, 6, C3, [256]],									# 4
   [-1, 1, DCNv2, [512, 3, 2]],  						# 5-P4/16
   [-1, 9, C3, [512]],									# 6
   [-1, 1, DCNv2, [1024, 3, 2]],  						# 7-P5/32
   [-1, 3, C3, [1024]],									# 8
   [-1, 1, SPPF, [1024, 5]],  							# 9
  ]

# YOLOv5 v6.0 head
head:
  [[-1, 1, Conv, [512, 1, 1]],							# 10
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],			# 11
   [[-1, 6], 1, Concat, [1]],  							# 12 cat backbone P4
   [-1, 3, C3, [512, False]],  							# 13

   [-1, 1, Conv, [256, 1, 1]],							# 14
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],			# 15
   [[-1, 4], 1, Concat, [1]],  							# 16 cat backbone P3
   [-1, 3, C3, [256, False]],  							# 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],							# 18
   [[-1, 14], 1, Concat, [1]],  						# 19 cat head P4
   [-1, 3, C3, [512, False]],  							# 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],							# 21
   [[-1, 10], 1, Concat, [1]],  						# 22 cat head P5
   [-1, 3, C3, [1024, False]],  						# 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  			# Detect(P3, P4, P5)
  ]

3.修改models/common.py

# --------------------------DCNv2 start--------------------------
class DCNv2(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size=3,
                 stride=1,
                 padding=1):

        super(DCNv2, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride if type(stride) == tuple else (stride, stride)
        self.padding = padding
        
        # init weight and bias
        self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size))
        self.bias = nn.Parameter(torch.Tensor(out_channels))

        # offset conv
        self.conv_offset_mask = nn.Conv2d(in_channels, 
                                          3 * kernel_size * kernel_size,
                                          kernel_size=kernel_size, 
                                          stride=stride,
                                          padding=self.padding, 
                                          bias=True)
        
        # init        
        self.reset_parameters()
        self._init_weight()


    def reset_parameters(self):
        n = self.in_channels * (self.kernel_size**2)
        stdv = 1. / math.sqrt(n)
        self.weight.data.uniform_(-stdv, stdv)
        self.bias.data.zero_()


    def _init_weight(self):
        # init offset_mask conv
        nn.init.constant_(self.conv_offset_mask.weight, 0.)
        nn.init.constant_(self.conv_offset_mask.bias, 0.)


    def forward(self, x):
        out = self.conv_offset_mask(x)
        o1, o2, mask = torch.chunk(out, 3, dim=1)
        offset = torch.cat((o1, o2), dim=1)
        mask = torch.sigmoid(mask)

        x = torchvision.ops.deform_conv2d(input=x, 
                                          offset=offset, 
                                          weight=self.weight, 
                                          bias=self.bias, 
                                          padding=self.padding,
                                          mask=mask,
                                          stride=self.stride)
        return x
# ---------------------------DCNv2 end---------------------------

4.修改models/yolo.py

if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3]:
#在列表加DCNv2
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3, DCNv2]: