YOLOv5：commons.yaml文件解读

ariesoary

已于 2022-12-19 14:12:26 修改

阅读量425

点赞数 1

分类专栏： YOLOv5白皮书文章标签：深度学习人工智能

于 2022-12-19 14:09:09 首次发布

本文链接：https://blog.csdn.net/weixin_56118617/article/details/128366711

版权

YOLOv5白皮书专栏收录该内容

8 篇文章 1 订阅

订阅专栏

🍨 本文为🔗365天深度学习训练营中的学习记录博客
🍦 参考文章：[365天深度学习训练营-第Y4周：common.py文件解读

🏡 我的环境：
● 语言环境：Python 3.8
● 数据集：coco128
● 深度学习环境：Pytorch

基础包和配置：

import ast # 抽象语法树 
import contextlib  #为with语句分配资源的实用程序
import json #用于json和Python数据之间的相互转换
import math  # 数学函数模块
import platform #获取操作系统的信息
import warnings #警告程序员关于语言或库功能的变化的方法
import zipfile #zip格式编码的压缩和解压缩
from collections import OrderedDict, namedtuple #集合模块
from copy import copy   # 数据拷贝模块 分浅拷贝和深拷贝
from pathlib import Path  # Path将str转换为Path对象 使字符串路径易于操作的模块
from urllib.parse import urlparse  #urllib为http请求库，parse 用来编码和解码


import cv2   # opencv-python
import numpy as np   # numpy数组操作模块
import pandas as pd   # panda数组操作模块
import requests # Python的HTTP客户端库
import torch  # pytorch深度学习框架
import torch.nn as nn   ## 专门为神经网络设计的模块化接口
from IPython.display import display 
from PIL import Image   # 图像基础操作模块
from torch.cuda import amp # 混合精度训练模块

from utils import TryExcept  
from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
                           increment_path, is_notebook, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
                           xyxy2xywh, yaml_load)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import copy_attr, smart_inference_mode

一.基本组件：

1.1、autopad

根据输入的卷积核计算该卷积模块所需的pad值。

def autopad(k, p=None, d=1):  # kernel, padding, dilation
    # Pad to 'same' shape outputs
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

1.2、Conv

最基础的模块: 卷积层 + BN层 + 激活函数。

class Conv(nn.Module):
    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
    default_act = nn.SiLU()  # default activation

    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        return self.act(self.conv(x))

1.3 Focus

将输入图像切为4份（即宽高各减半），再聚合到通道处。

class Focus(nn.Module):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super().__init__()
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
        # self.contract = Contract(gain=2)

    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
        # return self.conv(self.contract(x))

1.4、Bottleneck

由1x1conv、3x3conv、残差块组成。

class Bottleneck(nn.Module):
    # Standard bottleneck
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_, c2, 3, 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

1.5、BottleneckCSP

由Bottleneck模块和CSP结构组成。

class BottleneckCSP(nn.Module):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
        self.cv4 = Conv(2 * c_, c2, 1, 1)
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
        self.act = nn.SiLU()
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        y1 = self.cv3(self.m(self.cv1(x)))
        y2 = self.cv2(x)
        return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))

1.6 C3

轻量级的BottleCSP, 由三个卷积块和N个Bottleneck组成。

class C3(nn.Module):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))

1.7 SPP

空间金字塔池化，将更多不同分辨率的特征进行融合，得到更多的信息。

class SPP(nn.Module):
    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
    def __init__(self, c1, c2, k=(5, 9, 13)):
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

    def forward(self, x):
        x = self.cv1(x)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))

1.8、Concat

按照某个维度进行concat，常用来合并前后两个feature map

class Concat(nn.Module):
    # Concatenate a list of tensors along dimension
    def __init__(self, dimension=1):
        super().__init__()
        self.d = dimension

    def forward(self, x):
        return torch.cat(x, self.d)

1.9、Contract、Expand

用于改变feature map的维度，Contract将feature map的w和h维度(缩小)的数据收缩到channel维度上(放大)，Expand将channel维度(变小)的数据扩展到W和H维度(变大)。

class Contract(nn.Module):
    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
        s = self.gain
        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)


class Expand(nn.Module):
    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
        s = self.gain
        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)

2.关键类

2.1 NMS

非极大抑制，保留哪些框.

class NMS(nn.Module):
    """在yolo.py中Model类的nms函数中使用
    NMS非极大值抑制 Non-Maximum Suppression (NMS) module
    给模型model封装nms  增加模型的扩展功能  但是我们一般不用 一般是在前向推理结束后再调用non_max_suppression函数
    """
    conf = 0.25     # 置信度阈值              confidence threshold
    iou = 0.45      # iou阈值                IoU threshold
    classes = None  # 是否nms后只保留特定的类别 (optional list) filter by class
    max_det = 1000  # 每张图片的最大目标个数    maximum number of detections per image

    def __init__(self):
        super(NMS, self).__init__()

    def forward(self, x):
        """
        :params x[0]: [batch, num_anchors(3个yolo预测层), (x+y+w+h+1+num_classes)]
        直接调用的是general.py中的non_max_suppression函数给model扩展nms功能
        """
        return non_max_suppression(x[0], self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)

3.2、AutoShape

前处理、推理、后处理的模块(预处理 + 推理 + nms), 代码太长感兴趣的可以看看。

class AutoShape(nn.Module):
    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
    conf = 0.25  # NMS confidence threshold
    iou = 0.45  # NMS IoU threshold
    agnostic = False  # NMS class-agnostic
    multi_label = False  # NMS multiple labels per box
    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
    max_det = 1000  # maximum number of detections per image
    amp = False  # Automatic Mixed Precision (AMP) inference
.............

2.3 Detections

对推理结果进行处理，代码太长了。

2.4 Classify

二级分类，比如要做识别人脸面部表情，先要识别出人脸，如果想识别出人的面部表情，就需要二级分类进一步检测。

class Classify(nn.Module):
    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
        super().__init__()
        c_ = 1280  # efficientnet_b0 size
        self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
        self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
        self.drop = nn.Dropout(p=0.0, inplace=True)
        self.linear = nn.Linear(c_, c2)  # to x(b,c2)

    def forward(self, x):
        if isinstance(x, list):
            x = torch.cat(x, 1)
        return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))