注:本文为笔者查阅资料自行注释,仅供参考😁
Inception-v4可以说是图像分类中很好的素材了,我们可以直接通过 pretrainedmodels 模块来调用Google 开源的很多预构建模型,也可以直接看代码,前者更利于实践应用,而后者则更加利于我们理解这个模型。
----Inception-v4框架
首先我们知道,Inception-v4主体是由 5 个模块组成的:
Inception-A Inception-B Inception-C Reduction-A Reduction-B
源码中是这么写的:
def __init__(self, num_classes=1001):
super(InceptionV4, self).__init__()
# Special attributs
self.input_space = None
self.input_size = (299, 299, 3)
self.mean = None
self.std = None
# Modules
self.features = nn.Sequential(
BasicConv2d(3, 32, kernel_size=3, stride=2),
BasicConv2d(32, 32, kernel_size=3, stride=1),
BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),
Mixed_3a(),
Mixed_4a(),
Mixed_5a(),
Inception_A(),
Inception_A(),
Inception_A(),
Inception_A(),
Reduction_A(), # Mixed_6a
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Reduction_B(), # Mixed_7a
Inception_C(),
Inception_C(),
Inception_C()
)
self.last_linear = nn.Linear(1536, num_classes)
-------好复杂。。
----卷积池构建
首先我们看最基础的卷积层——这是后面的每个模块中都要用到的
每个参数的含义如下(有点多,后面就少了)🤢🤢
class BasicConv2d(nn.Module):
# 定义卷积层的各种参数: 输入通道 输出通道 卷积核大小 步长 填充取值为0表示不填充
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
# 调用父类 nn.Module中的初始化方法给BasicConv2d类初始化
# 注:使用super函数可以让子类调用父类中的函数,如果调用的是init函数,则子类可以继承父类的属性和方法
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes,
kernel_size=kernel_size, stride=stride,
padding=padding, bias=False) # biase偏置为False
# 归一化层 Batch Normalization 提高模型训练速度,提高模型的鲁棒性
# eps:防止分母为0的小常数,在对一个通道进行归一化的时候,为了防止分母为0,会在计算前先给分母
# 加上eps这个小常数,通过这种方式来避免出现分母为0的情况。
# momentum是用来计算滑动平均值的参数,用来减少内部协变量偏移
# affine表示是否进行仿射运算
# 注:仿射运算指的是在全连接层中权重矩阵和偏置向量结合的操作
# output = input * weight + biase
self.bn = nn.BatchNorm2d(out_planes,
eps=0.001, # value found in tensorflow
momentum=0.1, # default pytorch value
affine=True)
# 初始化relu激活函数
self.relu = nn.ReLU(inplace=True)
# 前向传播函数forward
def forward(self, x):
# 输入的张量x首先经过卷积层卷积
x = self.conv(x)
# 经过卷积之后的张量 送到归一化层进行归一化
x = self.bn(x)
# 经过归一化之后的结果送到激活层
x = self.relu(x)
# 将经过卷积-归一化-激活之后的张量返回
return x
----Mix-3a,Mix-4a,Mix-5a
class Mixed_3a(nn.Module):
def __init__(self):
# 调用父类nn.Module中的方法给Mixed_3a初始化
super(Mixed_3a, self).__init__()
# 最大池化层,使用 3 * 3 的池化窗口,步长为2
self.maxpool = nn.MaxPool2d(3, stride=2)
# 卷积层,输入通道64 输出通道 96 卷积核尺寸为 3 步长为 2
self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)
# 前向传播,这里仍然是一个最大池化层加上一个卷积层
def forward(self, x):
x0 = self.maxpool(x)
x1 = self.conv(x)
# 拼接,将最大池化层得到的结果和卷积层得到的结果进行拼接
out = torch.cat((x0, x1), 1)
return out
class Mixed_4a(nn.Module):
def __init__(self):
# 一样使用父类nn.Module来初始化
super(Mixed_4a, self).__init__()
# 定义了分支一,包含两个卷积层
self.branch0 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1), # 第一层,先通过1 * 1的卷积核将160个通道减少到64个,又称作降维
BasicConv2d(64, 96, kernel_size=3, stride=1) # 第二层,3 * 3 的卷积核进行卷积操作
)
# 下面这些都是一样的,值得一提的是BasicConv2d函数是上面自定义的,并不是API接口
self.branch1 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1),
#padding = (0,3) 表示高度维上不填充,宽度维度上填充3列0
BasicConv2d(64, 64, kernel_size=(1, 7), stride=1, padding=(0, 3)),
#padding = (3,0),表示在高度维上填充3行0,而宽度维上不进行填充
BasicConv2d(64, 64, kernel_size=(7, 1), stride=1, padding=(3, 0)),
BasicConv2d(64, 96, kernel_size=(3, 3), stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
out = torch.cat((x0, x1), 1)
return out
class Mixed_5a(nn.Module):
def __init__(self):
super(Mixed_5a, self).__init__()
self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
self.maxpool = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.conv(x)
x1 = self.maxpool(x)
out = torch.cat((x0, x1), 1)
return out
----OK,你现在已经理解了所有Inception-v4里的调用了😍
----Inception-A Inception-B Inception-C
# inception_A模块,只解释部分细节,很多和上面都是一样的
class Inception_A(nn.Module):
def __init__(self):
super(Inception_A, self).__init__()
self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
)
#padding = 1 表示在四周都填上一圈0
self.branch2 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), # 平均池化层,计算 3 * 3 范围的通道品均值,count_include_pad
# = False 表示计算到边缘的时候不包含空缺部分
BasicConv2d(384, 96, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Inception_B(nn.Module):
def __init__(self):
super(Inception_B, self).__init__()
self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)),
BasicConv2d(224, 256, kernel_size=(7, 1), stride=1, padding=(3, 0))
)
self.branch2 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)),
BasicConv2d(192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)),
BasicConv2d(224, 224, kernel_size=(7, 1), stride=1, padding=(3, 0)),
BasicConv2d(224, 256, kernel_size=(1, 7), stride=1, padding=(0, 3))
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1024, 128, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Inception_C(nn.Module):
def __init__(self):
super(Inception_C, self).__init__()
self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1, 3), stride=1, padding=(0, 1))
self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3, 1), stride=1, padding=(1, 0))
self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3, 1), stride=1, padding=(1, 0))
self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1, 3), stride=1, padding=(0, 1))
self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1, 3), stride=1, padding=(0, 1))
self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3, 1), stride=1, padding=(1, 0))
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1536, 256, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1_0 = self.branch1_0(x)
x1_1a = self.branch1_1a(x1_0)
x1_1b = self.branch1_1b(x1_0)
x1 = torch.cat((x1_1a, x1_1b), 1)
x2_0 = self.branch2_0(x)
x2_1 = self.branch2_1(x2_0)
x2_2 = self.branch2_2(x2_1)
x2_3a = self.branch2_3a(x2_2)
x2_3b = self.branch2_3b(x2_2)
x2 = torch.cat((x2_3a, x2_3b), 1)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
----Reduction-A Reduction-B
class Reduction_A(nn.Module):
def __init__(self):
super(Reduction_A, self).__init__()
self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
self.branch1 = nn.Sequential(
BasicConv2d(384, 192, kernel_size=1, stride=1),
BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
BasicConv2d(224, 256, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Reduction_B(nn.Module):
def __init__(self):
super(Reduction_B, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 192, kernel_size=3, stride=2)
)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 256, kernel_size=1, stride=1),
BasicConv2d(256, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)),
BasicConv2d(256, 320, kernel_size=(7, 1), stride=1, padding=(3, 0)),
BasicConv2d(320, 320, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
ok,你现在已经理解Inception-V4了!
注:在github上开源的代码里面是要下载预处理权重的
👇就是下面这段:url网址里面就是预处理的权重。
# inception-v4模型预处理段
pretrained_settings = {
'inceptionv4': {
'imagenet': {
# 存储预处理权重的下载链接可以提前下载然后用的时候指定文件地址
'url': 'https://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
# 输入图像的尺寸为299*299*3
'input_size': [3, 299, 299],
# 输入图像的像素值范围,通常表示像素点的颜色通道值为0`255,这里输入的像素值范围
# 表示经过归一化之后每个像素点的RGB通道值以浮点数0~1的形式存储
'input_range': [0, 1],
# 输入图像的均值
'mean': [0.5, 0.5, 0.5],
# 输入图像的标准差
'std': [0.5, 0.5, 0.5],
# 输出图像的类别
'num_classes': 1000
},
# 这个代码段代表会将背景也同时分类,使用包含背景的预处理权重
'imagenet+background': {
'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1001
}
}
}
每个模块结合B站上Inception-V4的讲解图示看可以更快理解!