import torch
import torch.nn as nn
from torch.nn import functional as F
from torch import Tensor
import math
from torchsummary import summary
BN_MOMENTUM = 0.1
# Resnet中对Conv2d进行封装,定义3*3的卷积核模板,我觉得没必要,反而会使得代码更混乱,不过熟悉了这种方法应该会觉得也不错
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False, padding_mode='zeros')
# 对Conv2d进行封装,定义1*1的卷积核模板
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
# 定义Mish激活函数
class Mish(nn.Module):
def __init__(self):
super(Mish, self).__init__()
def forward(self, x: Tensor) -> Tensor:
return x * torch.tanh(F.softplus(x)) # https://zhuanlan.zhihu.com/p/263555912
# 先定义基本的”卷积块“ Conv + BatchNormalization + Mish
# 因为Darknet的块是 卷积+卷积+残差
class ConvBNMish(nn.Module):
expansion = 1
def __init__(self, in_channels, out_channels, kernel_size, stride=1):
super(ConvBNMish, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size // 2, bias=False)
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
self.activation = Mish()
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
# 这里激活放在BN后,可以尝试放在BN前
out = self.activation(out)
return out
# CSP-darknet的结构块的组成部分
# 内部堆叠的残差块
class ResBlock(nn.Module):
def __init__(self, in_channels, hidden_channels=None):
super(ResBlock, self).__init__()
# hidden_channels 指的是残差块中间的维度,看不见的维度,输入维度 -> hidden_channels -> 输出维度
if hidden_channels is None:
hidden_channels = in_channels
# BasicBlock
# 每个Block有两个”卷积块“
self.block = nn.Sequential(ConvBNMish(in_channels, hidden_channels, kernel_size=1),
ConvBNMish(hidden_channels, in_channels, kernel_size=3))
# 残差函数
def forward(self, x):
return x + self.block(x) # 对应残差块 H(x) = F(x) + x
# CSPDarknet 的结构快,
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
# 第一个 CSP结构和后面的不同,没有残差块,参考YOLOv3论文
class ResBlockBody(nn.Module):
def __init__(self, in_channels, out_channels, num_block, first):
super(ResBlockBody, self).__init__()
# 下采样
self.downsample = ConvBNMish(in_channels, out_channels, kernel_size=3, stride=2) # 3*3
if first:
# 第一个块的输出维度与输入维度相同
self.split_conv0 = ConvBNMish(out_channels, out_channels, kernel_size=1) # csp net part1
self.split_conv1 = ConvBNMish(out_channels, out_channels, kernel_size=1) # csp net part2
self.blocks_conv = nn.Sequential( # csp net part2
ResBlock(in_channels=out_channels, hidden_channels=out_channels // 2),
ConvBNMish(out_channels, out_channels, kernel_size=1)
)
self.concat_conv = ConvBNMish(out_channels * 2, out_channels, 1) # csp net 连接
else:
# 从第二个块开始 输出维度是输入维度的一半
self.split_conv0 = ConvBNMish(out_channels, out_channels // 2, 1)
self.split_conv1 = ConvBNMish(out_channels, out_channels // 2, 1)
self.blocks_conv = nn.Sequential(
*[ResBlock(out_channels // 2) for _ in range(num_block)], # 多个resblock堆叠
ConvBNMish(out_channels // 2, out_channels // 2, kernel_size=1)
)
self.concat_conv = ConvBNMish(out_channels, out_channels, 1)
def forward(self, x):
# 先下采样
x = self.downsample(x)
# csp part1
x0 = self.split_conv0(x)
# csp part2
x1 = self.split_conv1(x)
x1 = self.blocks_conv(x1)
# csp 通道连接
out = torch.cat([x0, x1], dim=1)
# 最后再卷积
out = self.concat_conv(out)
return out
# CSPDarknet53 完整的网络结构
class CSPDarknet53(nn.Module):
def __init__(self, layer_num, num_classes): # layers_num:[1,2,8,8,4] 代表 ResBlock的重复个数
super(CSPDarknet53, self).__init__()
# 需要先定义第一个残差块的输入,总输入
self.in_channels = 32
# 输入维度为3,即GB图片 3通道, 输出通道的self.in_channels是残差块的输入
self.conv1 = ConvBNMish(3, self.in_channels, kernel_size=3, stride=1)
# 每个残差块的维度变换,输出维度
filters = [64, 128, 256, 512, 1024]
# 构造残差块的主体, layer_num
self.stages = nn.ModuleList([
ResBlockBody(self.in_channels, filters[0], layer_num[0], first=True),
ResBlockBody(filters[0], filters[1], layer_num[1], first=False),
ResBlockBody(filters[1], filters[2], layer_num[2], first=False),
ResBlockBody(filters[2], filters[3], layer_num[3], first=False),
ResBlockBody(filters[3], filters[4], layer_num[4], first=False)
])
self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.LeakyReLU(filters[4], num_classes)
# 权值初始化,可以选择多种,
# 1. 使用net.modules()遍历模型中的网络层的类型
# 2. 对其中的m层的weigth.data(tensor)部分进行初始化操作
# dense net 的初始化方法
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()
def forward(self, x):
out = self.conv1(x)
out = self.stages[0](out)
out = self.stages[1](out)
out3 = self.stages[2](out)
out4 = self.stages[3](out3)
out5 = self.stages[4](out4)
out = self.global_pooling(out5)
out = out.view(out.size(0), -1)
out = self.fc(out)
return F.softmax(out, dim=1)
def darknet_53(num_classes=2):
return CSPDarknet53([1, 2, 8, 8, 4], num_classes)
net = darknet_53()
summary(net, (3, 256, 256))
笔记,把这几天的学习记录一下。
- 1
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size // 2, bias=False)
这里padding = kernel_size // 2,根据卷积核的大小变化。
参考这里
在pytorch中,如果不设置padding,默认为valid,在Tensorflow中valid为不补零,这样输出和输入会不一致。
参考这里
有一个参数是dilation ,设置是否是空洞卷积。默认为0。不使用。
- 2
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
num_features应该是batch_size * width * height
参考这里
bn设置的参数实际上是channel的参数,但其实代码中是输入的通道数
详细看这里
如果不设置,momentum默认为0.1。
参考这里
- 3
需要安装torchsummary包,可以使用summary。非常好的工具。
- 4
还有一个expansion参数没用上,原版的resnet中需要,是对通道的倍乘。因为进来的时候对输入通道进行了降维,需要恢复。等之后用到再细细研究。
给个很棒的讲解。这里
resnet,基础版的不同之处只在于这里是三个卷积,分别是1x1,3x3,1x1,分别用来压缩维度,卷积处理,恢复维度,inplane是输入的通道数,plane是输出的通道数,expansion是对输出通道数的倍乘,在basic中expansion是1,此时完全忽略expansion这个东东,输出的通道数就是plane,然而bottleneck就是不走寻常路,它的任务就是要对通道数进行压缩,再放大,于是,plane不再代表输出的通道数,而是block内部压缩后的通道数,输出通道数变为planeexpansion。接着就是网络主体了。*