目录
Conv
Conv是yolov5中的最核心模块,代码如下:
# 为same卷积或same池化自动扩充,为了让卷积或池化以后的特征图的大小不变,在输入的特征图上要做0填充,填充多少通过以下函数进行计算
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
# k是int类型则整除2;或若干的整数值则循环整除
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution 标准卷积:conv+BN+hardswish
# 初始化构造函数 c1为输入channel的值, c2为输出channel的值, kernel, stride, padding, groups
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.Hardswish() if act else nn.Identity() # 若act=false, 则不使用激活函数
def forward(self, x): # 网络的执行顺序是根据forward函数来决定的
return self.act(self.bn(self.conv(x))) # conv->bn->hardswish
def fuseforward(self, x): # 没有BN,只有卷积和激活
return self.act(self.conv(x))
当不指定 p 时,Conv2d 的 pad 值是让 k//2 (除法向下取整) 计算得到的 (aotopad函数) 。
有以下两种情况:
- 当s=1时候,显然经过 Conv2d 后特征的大小不变;
- 当s=2时候,那么经过 Conv2d 后特征的大小减半;
Conv:Conv2d + BatchNorm2d + Hardswish,bias默认为False,即不使用bias偏置,Conv2d和BatchNorm2d结合使用以后,会进行融合操作,融合时卷积的base值会被消掉。
代码中使用 fuse_conv_and_bn 函数合并了 Conv2d 层和 BatchNorm2d 层。在模型训练完成后,代码在推理阶段和导出模型时,将卷积层和BN层进行融合。为了可视化,可以关闭 models / yolo.py -- fuse()
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers print('Fusing layers... ') # for m in self.model.modules(): # if type(m) is Conv and hasattr(m, 'bn'): # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability # m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv # delattr(m, 'bn') # remove batchnorm # m.forward = m.fuseforward # update forward # self.info() return self当关闭 fuse() 后,也使用 Netron 对 yolov5s.torchscript.pt 可视化,看到 BN。
对于任何用到shape、size返回值的参数时,例如:tensor.view(tensor.size(0), -1)这类操作,避免直接使用tensor.size的返回值,而是加上int转换,tensor.view(int(tensor.size(0)), -1)。为了避免pytorch 导出 onnx 时候,对size 进行跟踪,跟踪时候会生成gather、shape的节点。
修改代码:models/yolo.py
# bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) bs, _, ny, nx = map(int, x[i].shape)
Focus
若要图显示输出的尺寸,可以修改 export.py 代码,并重新导出onnx
# Checks onnx.save(shape_inference.infer_shapes(onnx_model), f)
切片
x[..., ::2, ::2], 红色图 w, h 从0开始,每隔一个切一个
x[..., 1::2, ::2], 黄色图 w 从1开始, h 从0开始,每隔一个切一个
x[..., ::2, 1::2], 绿色图 w 从0开始, h 从1开始,每隔一个切一个
x[..., 1::2, 1::2] 蓝色图 w, h 从1开始,每隔一个切一个
Concat
从通道维度拼接,得到 (1,12,320,320)
# Focus: 把宽度w和高度h的信息整合到c空间中
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2),具有下采样的效果
# 在forward函数中使用了切分操作(slice),切分以后再进行拼接,最后经过卷积
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
注:
[-1]:列表最后一项
a = [1, 2, 3, 4, 5] print(a[-1]) # 5
[:-1]:从第一项(取)到最后一项(不取)
原型是[n : m],前闭后开,索引从零开始,第一个索引指定可以取到,即n可以取到,后一个索引不可取到,即m不可取到。
a = [1, 2, 3, 4, 5] print(a[0:-1]) # [1, 2, 3, 4] print(a[:-1]) # [1, 2, 3, 4] print(a[1:2]) # [2]
[::-1]:代表从全列表倒序取
原型是[n : m : k],代表的是从某一段开始取,隔几个取一次。其中,n 代表取值开始的索引,该索引必将被取到,m代表取值结束的索引,该值不能被取到。k代表间隔,间隔可正可负,正值代表正向顺序跳取,负值代表反向跳取。
需要注意,当k为正的时,起始索引应该小于结束索引;当k为负的时,起始索引应该大于结束索引。a = [1, 2, 3, 4, 5] print(a[4:2:-1]) # [5, 4] print(a[3::1]) # [4, 5] print(a[::1]) # [1, 2, 3, 4, 5] print(a[1::2]) # [2, 4] print(a[3::-1]) # [4, 3, 2, 1]
[:,:,0]:它是对多维数据的处理方式,代表了前两维全选,取其中的所有0号索引。
原型是[n:m, j:k, g:h],维度可以更多,在图片处理中,一般用到三维。n,m管最外层列表的取值,从第n个元素到第m个元素,前闭后开;j,k管n,m取中的元素的列表中的元素的选取,也是前闭后开;g,h管j,k取中元素的列表中的元素的选取,任意一对都可以是个整数用来直接取某个索引,这是嵌套列表的取值方法。a = np.array([[[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12],[13, 14, 15, 16]], [[17, 18, 19, 20],[21, 22, 23, 24],[25, 26, 27, 28],[29, 30, 31, 32]], [[33, 34, 35, 36],[37, 38, 39, 40],[41, 42, 43, 44],[45, 46, 47, 48]]]) print(a[:,:,0]) # [[ 1 5 9 13] [17 21 25 29] [33 37 41 45]] print(a[1:2, 1:3, 0]) # [[21 25]] print(a[1:2, 1:3, 0:2]) # [[[21 22] [25 26]]]
[…,0]:代表了取最里边一层的所有第0号元素,…代表了对 :, :, :, 的省略。注意只能进行一次省略,不能是[… , …],全省略就是 a,[...,0]与[:,:,0]等价。
a = np.array([[[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12],[13, 14, 15, 16]], [[17, 18, 19, 20],[21, 22, 23, 24],[25, 26, 27, 28],[29, 30, 31, 32]], [[33, 34, 35, 36],[37, 38, 39, 40],[41, 42, 43, 44],[45, 46, 47, 48]]]) print(a[..., 0]) # [[ 1 5 9 13] [17 21 25 29] [33 37 41 45]] print(a[..., 0:2]) # [[[ 1 2] [ 5 6] [ 9 10] [13 14]] # [[17 18] [21 22] [25 26] [29 30]] # [[33 34] [37 38] [41 42] [45 46]]] print(a[0, ...]) # [[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12] [13 14 15 16]]
[…,::-1]:对最内层的列表进行逆序取值
a = np.array([[[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12],[13, 14, 15, 16]], [[17, 18, 19, 20],[21, 22, 23, 24],[25, 26, 27, 28],[29, 30, 31, 32]], [[33, 34, 35, 36],[37, 38, 39, 40],[41, 42, 43, 44],[45, 46, 47, 48]]]) print(a[..., ::-1]) # [[[ 4 3 2 1] [ 8 7 6 5] [12 11 10 9] [16 15 14 13]] # [[20 19 18 17] [24 23 22 21] [28 27 26 25] [32 31 30 29]] # [[36 35 34 33] [40 39 38 37] [44 43 42 41] [48 47 46 45]]] print(a[:,::-1,:]) # [[[13 14 15 16] [ 9 10 11 12] [ 5 6 7 8] [ 1 2 3 4]] # [[29 30 31 32] [25 26 27 28] [21 22 23 24] [17 18 19 20]] # [[45 46 47 48] [41 42 43 44] [37 38 39 40] [33 34 35 36]]]
BCSPn
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super(Bottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) # 1×1的卷积
self.cv2 = Conv(c_, c2, 3, 1, g=g) # 3×3的卷积
self.add = shortcut and c1 == c2 # c1与c2需相同才可进行shortcut运算
def forward(self, x):
# 根据self.add的值确定是否有shortcut
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(BottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
# *操作符可以把一个list拆开成一个个独立的元素,然后再送入Sequential来构造m,相当于m用了n次Bottleneck的操作
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
SPP
class SPP(nn.Module):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13)):
super(SPP, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
Concat
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super(Concat, self).__init__()
# 指定dimension,沿着哪一个维度进行拼接
self.d = dimension # the dimension over which the tensors are concatenated
def forward(self, x):
# x为tensor的列表,self.d即指定的维度
return torch.cat(x, self.d)
common.py代码解析
# common.py
# This file contains modules common to various models
import math
import numpy as np
import torch
import torch.nn as nn
from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords
# 为same卷积或same池化自动扩充,为了让卷积或池化以后的特征图的大小不变,在输入的特征图上要做0填充,填充多少通过以下函数进行计算
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
# k是int类型则整除2;或若干的整数值则循环整除
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
# 深度可分离卷积,在yolov5中未真正的使用
def DWConv(c1, c2, k=1, s=1, act=True): # c1和c2为输入和输出的channel的值, k=1是卷积核kernel,s=1是步长stride
# Depthwise convolution
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) # math.gcd()返回最大公约数;DWConv返回类,类的定义如下
class Conv(nn.Module):
# Standard convolution 标准卷积:conv+BN+hardswish
# 初始化构造函数 c1为输入channel的值, c2为输出channel的值, kernel, stride, padding, groups
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
super(Conv, self).__init__()
# bias=False, 即不使用bias偏置,Conv2d和BatchNorm2d结合使用以后,会进行融合操作,融合时卷积的base值会被消掉
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.Hardswish() if act else nn.Identity() # 若act=false, 则不使用激活函数
def forward(self, x): # 网络的执行顺序是根据forward函数来决定的
return self.act(self.bn(self.conv(x))) # conv->bn->hardswish
def fuseforward(self, x): # 没有BN,只有卷积和激活
return self.act(self.conv(x))
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super(Bottleneck, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1) # 1×1的卷积
self.cv2 = Conv(c_, c2, 3, 1, g=g) # 3×3的卷积
self.add = shortcut and c1 == c2 # c1与c2需相同才可进行shortcut运算
def forward(self, x):
# 根据self.add的值确定是否有shortcut
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super(BottleneckCSP, self).__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.LeakyReLU(0.1, inplace=True)
# *操作符可以把一个list拆开成一个个独立的元素,然后再送入Sequential来构造m,相当于m用了n次Bottleneck的操作
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
# 空间金字塔池化
class SPP(nn.Module):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13)):
super(SPP, self).__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
# Focus: 把宽度w和高度h的信息整合到c空间中
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2),具有下采样的效果
# 在forward函数中使用了切分操作(slice),切分以后再进行拼接,最后经过卷积
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super(Concat, self).__init__()
# 指定dimension,沿着哪一个维度进行拼接
self.d = dimension # the dimension over which the tensors are concatenated
def forward(self, x):
# x为tensor的列表,self.d即指定的维度
return torch.cat(x, self.d)
# 非极大值抑制
class NMS(nn.Module):
# Non-Maximum Suppression (NMS) module
conf = 0.25 # confidence threshold 置信度阈值
iou = 0.45 # IoU threshold
classes = None # (optional list) filter by class 类别列表
def __init__(self):
super(NMS, self).__init__()
def forward(self, x):
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
# 自动调整shape,该类基本未用
class autoShape(nn.Module):
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
img_size = 640 # inference size (pixels)
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
classes = None # (optional list) filter by class
def __init__(self, model):
super(autoShape, self).__init__()
self.model = model
def forward(self, x, size=640, augment=False, profile=False):
# supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
# opencv: x = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
# PIL: x = Image.open('image.jpg') # HWC x(720,1280,3)
# numpy: x = np.zeros((720,1280,3)) # HWC
# torch: x = torch.zeros(16,3,720,1280) # BCHW
# multiple: x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
p = next(self.model.parameters()) # for device and type
if isinstance(x, torch.Tensor): # torch
return self.model(x.to(p.device).type_as(p), augment, profile) # inference
# Pre-process
if not isinstance(x, list):
x = [x]
shape0, shape1 = [], [] # image and inference shapes
batch = range(len(x)) # batch size
for i in batch:
x[i] = np.array(x[i])[:, :, :3] # up to 3 channels if png
s = x[i].shape[:2] # HWC
shape0.append(s) # image shape
g = (size / max(s)) # gain
shape1.append([y * g for y in s])
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch] # pad
x = np.stack(x, 0) if batch[-1] else x[0][None] # stack
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
# Inference
x = self.model(x, augment, profile) # forward
x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
# Post-process
for i in batch:
if x[i] is not None:
x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i])
return x
# 展平
class Flatten(nn.Module):
# Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
@staticmethod
def forward(x):
return x.view(x.size(0), -1) # 将张量展平
# 用于第二级分类,即将目标检测出来的结果再次分类,较简单可改写
class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super(Classify, self).__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 自适应平均池化操作
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1)
self.flat = Flatten()
def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
return self.flat(self.conv(z)) # flatten to x(b,c2)
参考文章:
Python中[-1]、[:-1]、[::-1]、[n::-1]、[:,:,0]、[…,0]、[…,::-1] 的理解_Dunkle.T的博客-CSDN博客_python[-1]
深入浅出Yolo系列之Yolov3&Yolov4&Yolov5&Yolox核心基础知识完整讲解 - 知乎 (zhihu.com)