0.导入需要的包和基本配置
import ast
import contextlib
import json
import math # 数学函数模块
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy # 数据拷贝模块,分浅拷贝和深拷贝
from pathlib import Path # Path将str转换为Path对象,使字符串路径易于操作的模块
from urllib.parse import urlparse
import cv2
import numpy as np # numpy数组操作模块
import pandas as pd # pandas数组操作模块
import requests # Python的HTTP客户端库
import torch # pytorch深度学习框架
import torch.nn as nn # 专门为神经网络设计的模块化接口
from IPython.display import display
from PIL import Image # 图像基础操作模块
from torch.cuda import amp # 混合精度训练模块
from utils import TryExcept
from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
increment_path, is_notebook, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
xyxy2xywh, yaml_load)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import copy_attr, smart_inference_mode
1.conv
class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
''' 在Focus、Bottleneck、BottleneckCSP、C3、SPP、DWConv、TransformerBlock等模块中调用
Standard convolution : conv + BN + act
:params c1: 输入的channel值
:params c2: 输出的channel值
:params k: 卷积的kernel_size
:params s: 卷积的stride
:params p: 卷积的padding,默认是None,可以通过autopad自行计算需要的padding值
:params g: 卷积的groups数,1就是普通的卷积,>1就是深度可分离卷积
:params act: 激活函数类型,True就是SiLU()/Swish,False就是不使用激活函数,类型是nn.Module就使用传进来的激活函数类型
'''
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
''' 用于Model类的fuse函数
融合 Conv + BN 加速推理,一般用于测试/验证阶段
'''
return self.act(self.conv(x))
2.Bottleneck
class Bottleneck(nn.Module):
# Standard bottleneck Conv + Conv + shortcut
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
''' 在BottleneckCSP和yolo.py的parse_model函数中被调用
:params c1: 第一个卷积的输入channel
:params c2: 第二个卷积的输入channel
:params shortcut: bool值,是否有shortcut连接,默认True
:params g: 卷积分组的个数,=1普通卷积,>1深度可分离卷积
:params e: expansion ratio,e*c2就是第一个卷积的输出channel=第二个卷积的输入channel
'''
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) # 1x1
self.cv2 = Conv(c_, c2, 3, 1, g=g) # 3x3
self.add = shortcut and c1 == c2 # shortcut=Ture & c1==c2 才能做shortcut
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
3 BottleneckCSP
C3
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
''' 在C3RT模块和yolo.py的parse_model函数中被调用
:params c1: 整个C3的输入channel
:params c2: 整个C3的输出channel
:params n: 有n个子模块[Bottleneck/CrossConv]
:params shortcut: bool值,子模块[Bottlenec/CrossConv]中是否有shortcut,默认True
:params g: 子模块[Bottlenec/CrossConv]中的3x3卷积类型,=1普通卷积,>1深度可分离卷积
:params e: expansion ratio,e*c2=中间其它所有层的卷积核个数=中间所有层的的输入输出channel
'''
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
# 实验性 CrossConv
#self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))