yolov7的网络结构代码详解

yolov7的主干网络是其核心创新,主要是增加了E-ELAN模块和aux的辅助检测头,目前来看新的网络结构还是有点效果的,主要的代码详解如下,对应yolo.py。

import argparse
import logging
import sys
from copy import deepcopy

sys.path.append('./')  # to run '$ python *.py' files in subdirectories
logger = logging.getLogger(__name__)
import torch
from models.common import *
from models.experimental import *
from utils.autoanchor import check_anchor_order
from utils.general import make_divisible, check_file, set_logging
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
    select_device, copy_attr
from utils.loss import SigmoidBin

try:
    import thop  # for FLOPS computation
except ImportError:
    thop = None


class Detect(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export
    end2end = False
    include_nms = False 

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
                y = x[i].sigmoid()
                if not torch.onnx.is_in_onnx_export():
                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:
                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))

        if self.training:
            out = x
        elif self.end2end:
            out = torch.cat(z, 1)
        elif self.include_nms:
            z = self.convert(z)
            out = (z, )
        else:
            out = (torch.cat(z, 1), x)

        return out

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()

    def convert(self, z):
        z = torch.cat(z, 1)
        box = z[:, :, :4]
        conf = z[:, :, 4:5]
        score = z[:, :, 5:]
        score *= conf
        convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
                                           dtype=torch.float32,
                                           device=z.device)
        box @= convert_matrix                          
        return (box, score)


class IDetect(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export
    end2end = False
    include_nms = False 

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(IDetect, self).__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers=3 
        self.na = len(anchors[0]) // 2  # number of anchors 6//2=3
        self.grid = [torch.zeros(1)] * self.nl  # init grid [tensor([0.]), tensor([0.]), tensor([0.])]
        a = torch.tensor(anchors).float().view(self.nl, -1, 2) # (3, 6)->(3, 3, 2) = [[[10,13], [16,30], [33,23]], [[30,61], [62,45], [59,119]], [[116,90], [156,198], [373,326]]]
        self.register_buffer('anchors', a)  # shape(nl,na,2)=(3,3,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)=(3,1,3,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        
        self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
        self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](self.ia[i](x[i]))  # conv
            x[i] = self.im[i](x[i])
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)
    
    def fuseforward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                if not torch.onnx.is_in_onnx_export():
                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:
                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))

        if self.training:
            out = x
        elif self.end2end:
            out = torch.cat(z, 1)
        elif self.include_nms:
            z = self.convert(z)
            out = (z, )
        else:
            out = (torch.cat(z, 1), x)

        return out
    
    def fuse(self):
        print("IDetect.fuse")
        # fuse ImplicitA and Convolution
        for i in range(len(self.m)):
            c1,c2,_,_ = self.m[i].weight.shape
            c1_,c2_, _,_ = self.ia[i].implicit.shape
            self.m[i].bias
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
Yolov5 是一种用于目标检测的神经网络模型,它是由 Alexey Bochkovskiy 在 YOLO (You Only Look Once) 系列模型的基础上进行改进和优化而来的。下面是 Yolov5 的网络结构代码详解: ```python import torch import torch.nn as nn import torch.nn.functional as F class Conv(nn.Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, groups=1, bias=False): super(Conv, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=(kernel_size - 1) // 2, groups=groups, bias=bias) self.bn = nn.BatchNorm2d(out_channels) self.act = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.act(x) return x class Bottleneck(nn.Module): def __init__(self, in_channels, out_channels, shortcut=True, groups=1, expansion=0.5): super(Bottleneck, self).__init__() hidden_channels = int(out_channels * expansion) self.conv1 = Conv(in_channels, hidden_channels, 1) self.conv2 = Conv(hidden_channels, out_channels, 3, groups=groups) self.shortcut = shortcut and in_channels == out_channels if self.shortcut: self.shortcut_conv = Conv(in_channels, out_channels, 1) def forward(self, x): shortcut = x x = self.conv1(x) x = self.conv2(x) if self.shortcut: shortcut = self.shortcut_conv(shortcut) x += shortcut return x class CSPDarknet(nn.Module): def __init__(self, layers, channels=(32, 64, 128, 256, 512, 1024), expansion=0.5): super(CSPDarknet, self).__init__() self.in_channels = channels[0] self.conv1 = Conv(3, self.in_channels, kernel_size=3, stride=1) self.stages = nn.ModuleList() for i, (in_channels, out_channels) in enumerate(zip(channels[:-1], channels[1:])): if i == 0: self.stages.append(nn.Sequential( Conv(in_channels, out_channels, kernel_size=3, stride=2), Bottleneck(out_channels, out_channels, groups=1, expansion=1))) else: self.stages.append(nn.Sequential( Conv(in_channels, out_channels // 2, kernel_size=1), Bottleneck(out_channels // 2, out_channels // 2, groups=1, expansion=1), Conv(out_channels // 2, out_channels // 2, kernel_size=1), Bottleneck(out_channels // 2, out_channels // 2, groups=1, expansion=1), Conv(out_channels // 2, out_channels, kernel_size=1), Bottleneck(out_channels, out_channels, groups=1, expansion=1))) self.stages.append(Conv(out_channels * 2, out_channels, kernel_size=1)) self.init_weights() def forward(self, x): x = self.conv1(x) x = self.stages[0](x) route = x x = self.stages[1](x) x = self.stages[2](x) x = self.stages[3](x) x = self.stages[4](x) x = self.stages[5](x) return route, x def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) class YOLOv5(nn.Module): def __init__(self, num_classes=80, expansion=0.5, width_mult=1.0): super(YOLOv5, self).__init__() channels = (64, 128, 256, 512, 1024) channels = [int(c * width_mult) for c in channels] self.backbone = CSPDarknet(layers=[1, 2, 8, 8, 4], channels=channels, expansion=expansion) self.num_classes = num_classes self.num_anchors = 3 self.num_attrib = num_classes + 5 self.heads = self._make_head(channels) def forward(self, x): x = self.backbone(x) outputs = [] for i, head in enumerate(self.heads): outputs.append(head(x)) return outputs def _make_head(self, channels): heads = nn.ModuleList() for i in range(3): heads.append(nn.Sequential( Conv(channels[-1], channels[-1] // 2, kernel_size=1), Conv(channels[-1] // 2, channels[-1], kernel_size=3), Conv(channels[-1], channels[-1] // 2, kernel_size=1), Conv(channels[-1] // 2, channels[-1], kernel_size=3), Conv(channels[-1], self.num_attrib * self.num_anchors, kernel_size=1, bias=True))) return heads ``` 以上是 Yolov5 的网络结构代码详解代码中包含了一些常用的卷积和规范化层,还有 CSPDarknet 和 YOLOv5 的主要结构。YOLOv5 的网络结构包括了主干网络 CSPDarknet 和三个输出头,每个输出头负责预测不同尺度的目标框。在 forward 方法中,通过对主干网络和输出头进行调用,得到最终的输出结果。 这部分代码只是 Yolov5 网络结构的实现,模型的训练和推理过程还需要其他的代码进行支持。希望这些信息能够帮助到你!如有任何疑问,请随时提出。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值