2.6.7 YOLOV5_yolobody网络注释

代码注释如下(注意:ConvNext和CSPdarknet网络可以根据前面几个章节的内容自己写):

import torch
import torch.nn as nn

from ConvNext import ConvNeXt_Small, ConvNeXt_Tiny
from CSPdarknet import C3, Conv, CSPDarknet


# ---------------------------------------------------#
#   yolo_body
# ---------------------------------------------------#
class YoloBody(nn.Module):
    def __init__(self, anchors_mask= [[6,7,8], [3,4,5], [0,1,2]], num_classes=10, phi="s", backbone='cspdarknet', pretrained=False, input_shape=[640, 640]):
        super(YoloBody, self).__init__()
        depth_dict = {'s': 0.33, 'm': 0.67, 'l': 1.00, 'x': 1.33, }
        width_dict = {'s': 0.50, 'm': 0.75, 'l': 1.00, 'x': 1.25, }
        dep_mul, wid_mul = depth_dict[phi], width_dict[phi]

        base_channels = int(wid_mul * 64)  # 64
        base_depth = max(round(dep_mul * 3), 1)  # 3
        # -----------------------------------------------#
        #   输入图片是640, 640, 3
        #   初始的基本通道是64
        # -----------------------------------------------#
        self.backbone_name = backbone
        if backbone == "cspdarknet":
            # ---------------------------------------------------#
            #   生成CSPdarknet53的主干模型
            #   获得三个有效特征层,他们的shape分别是:
            #   80,80,256
            #   40,40,512
            #   20,20,1024
            # ---------------------------------------------------#
            self.backbone = CSPDarknet(base_channels, base_depth, phi, pretrained)
        else:
            # ---------------------------------------------------#
            #   如果输入不为cspdarknet,则调整通道数
            #   使其符合YoloV5的格式
            # ---------------------------------------------------#
            self.backbone = {
                'convnext_tiny': ConvNeXt_Tiny,
                'convnext_small': ConvNeXt_Small,
                #'swin_transfomer_tiny': Swin_transformer_Tiny,
            }[backbone](pretrained=pretrained, input_shape=input_shape)
            in_channels = {
                'convnext_tiny': [192, 384, 768],
                'convnext_small': [192, 384, 768],
                #'swin_transfomer_tiny': [192, 384, 768],
            }[backbone]
            feat1_c, feat2_c, feat3_c = in_channels
            self.conv_1x1_feat1 = Conv(feat1_c, base_channels * 4, 1, 1)
            self.conv_1x1_feat2 = Conv(feat2_c, base_channels * 8, 1, 1)
            self.conv_1x1_feat3 = Conv(feat3_c, base_channels * 16, 1, 1)

        #上采样,输出大小为输入的二倍,用最近邻法进行上采样
        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")

        #base_channels=64,输入通道数为16*64,输出为8*64,这里用来上采样,用了一个卷积层和C3层,C3层是保证输出的大小一定.注意:上采样时通道数减少
        self.conv_for_feat3 = Conv(base_channels * 16, base_channels * 8, 1, 1)
        self.conv3_for_upsample1 = C3(base_channels * 16, base_channels * 8, base_depth, shortcut=False)

        # base_channels=64,输入通道数为8*64,输出为4*64,这里用来上采样,用了一个卷积层和C3层,C3层是保证输出的大小一定
        self.conv_for_feat2 = Conv(base_channels * 8, base_channels * 4, 1, 1)
        self.conv3_for_upsample2 = C3(base_channels * 8, base_channels * 4, base_depth, shortcut=False)

        # base_channels=64,输入通道数为4*64,输出为4*64,这里用来下采样,用了一个卷积层和C3层,C3层是保证输出的大小一定
        self.down_sample1 = Conv(base_channels * 4, base_channels * 4, 3, 2)
        self.conv3_for_downsample1 = C3(base_channels * 8, base_channels * 8, base_depth, shortcut=False)

        # base_channels=64,输入通道数为8*64,输出为8*64,这里用来下采样,用了一个卷积层和C3层,C3层是保证输出的大小一定
        self.down_sample2 = Conv(base_channels * 8, base_channels * 8, 3, 2)
        self.conv3_for_downsample2 = C3(base_channels * 16, base_channels * 16, base_depth, shortcut=False)

        #YOLOV5的头部,输出三层不同的特征
        # 80, 80, 256 => 80, 80, 3 * (5 + num_classes) => 80, 80, 3 * (4 + 1 + num_classes)
        self.yolo_head_P3 = nn.Conv2d(base_channels * 4, len(anchors_mask[2]) * (5 + num_classes), 1)
        # 40, 40, 512 => 40, 40, 3 * (5 + num_classes) => 40, 40, 3 * (4 + 1 + num_classes)
        self.yolo_head_P4 = nn.Conv2d(base_channels * 8, len(anchors_mask[1]) * (5 + num_classes), 1)
        # 20, 20, 1024 => 20, 20, 3 * (5 + num_classes) => 20, 20, 3 * (4 + 1 + num_classes)
        self.yolo_head_P5 = nn.Conv2d(base_channels * 16, len(anchors_mask[0]) * (5 + num_classes), 1)

    def forward(self, x):
        #  backbone
        feat1, feat2, feat3 = self.backbone(x)
        if self.backbone_name != "cspdarknet":
            feat1 = self.conv_1x1_feat1(feat1)
            feat2 = self.conv_1x1_feat2(feat2)
            feat3 = self.conv_1x1_feat3(feat3)

        # 20, 20, 1024 -> 20, 20, 512
        P5 = self.conv_for_feat3(feat3)#用Conv网络降通道数:1024---->512
        # 20, 20, 512 -> 40, 40, 512   上采样
        P5_upsample = self.upsample(P5)#通道数不变,尺寸变为2倍,20X20X512---->40X40X512
        # 40, 40, 512 -> 40, 40, 1024
        P4 = torch.cat([P5_upsample, feat2], 1) #cat是将通道数合并,就得到512+512=1024,这里的特征P5_upsample和feat2是并列的
        # 40, 40, 1024 -> 40, 40, 512
        P4 = self.conv3_for_upsample1(P4)#用C3网络降通道数:1024---->512

        # 40, 40, 512 -> 40, 40, 256
        P4 = self.conv_for_feat2(P4)#用Conv网络降通道数:512---->256
        # 40, 40, 256 -> 80, 80, 256   上采样
        P4_upsample = self.upsample(P4)#通道数不变,尺寸变为2倍,40X40X256---->80X80X256
        # 80, 80, 256 cat 80, 80, 256 -> 80, 80, 512
        P3 = torch.cat([P4_upsample, feat1], 1)#cat是将通道数合并,就得到256+256=512,这里的特征P4_upsample和feat1是并列的
        # 80, 80, 512 -> 80, 80, 256
        P3 = self.conv3_for_upsample2(P3)#用C3网络降通道数:512---->256

        # 80, 80, 256 -> 40, 40, 256
        P3_downsample = self.down_sample1(P3)#利用Conv网络,输入输出的通道数相同,卷积核为3,步长为2的卷积进行下采样,结果是输出大小是输入大小的二分之一
        # 40, 40, 256 cat 40, 40, 256 -> 40, 40, 512
        P4 = torch.cat([P3_downsample, P4], 1)#cat是将通道数合并,就得到256+256=512,这里的特征P4_upsample和feat1是并列的
        # 40, 40, 512 -> 40, 40, 512
        P4 = self.conv3_for_downsample1(P4)#用C3网络保持输入大小不变,512---->512

        # 40, 40, 512 -> 20, 20, 512
        P4_downsample = self.down_sample2(P4)
        # 20, 20, 512 cat 20, 20, 512 -> 20, 20, 1024
        P5 = torch.cat([P4_downsample, P5], 1)
        # 20, 20, 1024 -> 20, 20, 1024
        P5 = self.conv3_for_downsample2(P5)

        # ---------------------------------------------------#
        #   第三个特征层
        #   y3=(batch_size,75,80,80)
        # ---------------------------------------------------#
        out2 = self.yolo_head_P3(P3)
        # ---------------------------------------------------#
        #   第二个特征层
        #   y2=(batch_size,75,40,40)
        # ---------------------------------------------------#
        out1 = self.yolo_head_P4(P4)
        # ---------------------------------------------------#
        #   第一个特征层
        #   y1=(batch_size,75,20,20)
        # ---------------------------------------------------#
        out0 = self.yolo_head_P5(P5)
        return out0, out1, out2

if __name__ == "__main__":

    #假设输入图片大小为640X640X640,Batch大小为4,处理后成4X3x640x640的tensor类型的矩阵,输入数据如下
    input=torch.randn(4,3,640,640)

    #运行YoloBody网络
    ss=YoloBody()

    #输出3个特征值,分别对应网络结构的head1,head2,head3
    out0, out1, out2=ss(input)
    
    print(out0.shape, out1.shape, out2.shape)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值