#basicconv-conv2d+BN+ReLu(conv3x3,conv1x1)
#Residual Unit ,Bottleneck
架构图
#导入需要的库
import torch
import torch.nn as nn
from typing import Type,Union,List,Optional
from torchinfo import summary
定义3x3核的卷积层
def conv3x3(in_,out_,stride=1,initialzero=False):
bn=nn.BatchNorm2d(out_)
#需要进行判断,要对BN进行0初始化吗?
# 只有最后一层才需要初始化,不是最后一层就不改变gama和beta
#initialzero代表需要初始化
if initialzero==True:
#只需要改gama,beta本来就是0
nn.init.constant_(bn.weight,0)
return nn.Sequential(
nn.Conv2d(in_,out_,kernel_size=3,padding=1,stride=stride
,bias=False)
,bn)
#进行测试
conv3x3(2,10)
Sequential(
(0): Conv2d(2, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
定义1x1核的卷积层
def conv1x1(in_,out_,stride=1,initialzero=False):
bn=nn.BatchNorm2d(out_)
#需要进行判断,要对BN进行0初始化吗?
# 只有最后一层才需要初始化,不是最后一层就不改变gama和beta
#initialzero代表需要初始化
if initialzero==True:
#只需要改gama,beta本来就是0
nn.init.constant_(bn.weight,0)
return nn.Sequential(
nn.Conv2d(in_,out_,kernel_size=1,padding=0,stride=stride
,bias=False)
,bn)
#测试代码
conv1x1(2,10,1,True)
Sequential(
(0): Conv2d(2, 10, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
#如果进行了0初始化的话,我们得到的bn层的初始的参数全部都是0
conv1x1(2,10,1,True)[1].weight
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
#如果我们没有进行初始化的话,我们的道德bn蹭的初始的参数全部都是1
conv1x1(2,10,1)[1].weight
Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True)
定义残差块
#ResidualUnit
#这是我们残差单元的类
class ResidualUnit(nn.Module):
#stride1是否等于2呢?如果等于2-特征图尺寸会发生变化
#需要再跳跃连接上增加1x1卷积层来调整特征图尺寸
#如果stride1等于1,则什么都不需要做
def __init__(self,out_ :int,
stride1:int=1,#定义补偿的默认值是1
in_:Optional[int]=None):
super().__init__()
self.stride1=stride1
#当特征图尺寸需要缩小时,卷积层的输出特征图的数量out等于输入特征图数量in/2
#当特征图的尺寸不需要缩小的时候,out==in
if stride1 !=1:
in_=int(out_/2)
else:
in_=out_
#拟合部分,输出F(x)
self.fit_=nn.Sequential(
#传入一个超参数stride1,来表示我们传入的第一个卷积层的补偿
conv3x3(in_,out_,stride=stride1),
nn.ReLU(inplace=True),
#最后一个卷积层的bn层的初始化为0
conv3x3(out_,out_,initialzero=True)
#还没有加和,这里不用relu激活函数
)
#跳跃连接,输出x(1x1卷积核之后的x)
self.skipconv=conv1x1(in_,out_,stride=stride1)
#单独定义放在H(x)之后使用的激活函数ReLu
self.relu=nn.ReLU(inplace=True)
def forward(self,x):
#拟合结果
fx=self.fit_(x)
if self.stride1 ==2:
x=self.skipconv(x) #跳跃连接
#跳跃连接
hx=self.relu(fx+x)
return hx
#进行测试
data=torch.ones(10,64,56,56)
conv3_x_18_0=ResidualUnit(out_=128,stride1=2)
#这里我们模拟的是conv3的第一层
conv3_x_18_0(data).shape#0号残差单元,需要特征图折半,特征图数量加倍
torch.Size([10, 128, 28, 28])
#conv2的第0层
#这里不需要将我们的特征图折半,因为上面的池化层已经帮我们这般了
conv2_x_18_0=ResidualUnit(out_=64)
conv2_x_18_0(data).shape
torch.Size([10, 64, 56, 56])
定义瓶颈模块
class Bottleneck(nn.Module):
def __init__(self,middle_out,stride1:int=1,in_:Optional[int]=None):
super().__init__()
out_=4*middle_out
#我希望使用选填参数in_来帮助我们区别,这个架构是不是conv1后面的那个瓶颈架构
#如果不是跟在conv1后,就不填写in_
if in_==None:
#从conv2_x -conv3_x-conv4_x-conv5_x相互连接的时候
#每次都需要将特征图尺寸折半,同时卷积层上的middle_out=1/2in_
if stride1 !=1:
in_=middle_out*2
else:
#不缩小特征图的场合,即这个平津结构不是这个layers的第一个瓶颈结构的时候
in_=middle_out*4
self.fit_=nn.Sequential(
conv1x1(in_,middle_out,stride=stride1),
nn.ReLU(inplace=True),
conv3x3(middle_out,middle_out)
,nn.ReLU(inplace=True),
conv1x1(middle_out,out_,initialzero=True)
)
self.skipconv=conv1x1(in_,out_,stride=stride1)
self.relu=nn.ReLU(inplace=True)
def forward(self,x):
fx=self.fit_(x)
#瓶颈结构的输入和输出一定是不相同的,所以我们这里只需要直接写死为卷积就可以了。
x=self.skipconv(x)
hx=self.relu(fx+x)
return hx
#测试
data1=torch.ones(10,64,56,56)#conv2x的输入
#假设我是conv1后紧跟的第一个瓶颈结构
conv2_x_101_0=Bottleneck(in_=64,middle_out=64)
conv2_x_101_0(data1).shape
torch.Size([10, 256, 56, 56])
#不是conv1后紧跟的第一个瓶颈结构,但是需要缩小特征图尺寸
data2=torch.ones(10,256,56,56)
conv3_x_101_0=Bottleneck(middle_out=128,stride1=2)
conv3_x_101_0(data2).shape
torch.Size([10, 512, 28, 28])
#不是conv1后的第一个瓶颈结构,也不需要缩小特征图
data3=torch.ones(10,512,28,28)
conv3_x_101_1=Bottleneck(middle_out=128)
conv3_x_101_1(data3).shape
torch.Size([10, 512, 28, 28])
定义一个函数来编写层
#在单一的layers里的残差单元/瓶颈结构的数量使用num_blocks来表示
layers=[]
num_blocks=6
#第一个块
afterconv1=True#是的话,就是conv1之后的第一个块,false的话就不是conv1之后的第一个块
if afterconv1 == True:
layers.append(Bottleneck(middle_out=64,in_=64))
else:
layers.append(Bottleneck(middle_out=128,stride1=2))
#剩下的块就使用for循环来跑
for i in range(num_blocks-1):
layers.append(Bottleneck(middle_out=128))
len(layers)
6
layers=[]
num_blocks=6
#第一个块
afterconv1=True#是的话,就是conv1之后的第一个块,false的话就不是conv1之后的第一个块
if afterconv1 == True:
layers.append(ResidualUnit(out_=64,in_=64))
else:
layers.append(ResidualUnit(out_=128,stride1=2))
#剩下的块就使用for循环来跑
for i in range(num_blocks-1):
layers.append(ResidualUnit(out_=128))
len(layers)
6
#可以同时用于残差单元和瓶颈结构
#我们将这个功能打包到一个函数中,用来实现生成每一个layers的函数
#block中填入的类型只能残差单元或者瓶颈结构
def make_layers(
block:Type[Union[ResidualUnit,Bottleneck]],
middle_out:int,
num_blocks:int,
#有默认值参数的一定要写在没有默认值参数的最后面
afterconv1:bool=False):
layers=[]
if afterconv1 == True:
layers.append(block(middle_out,in_=64))
else:
layers.append(block(middle_out,stride1=2))
#剩下的块就使用for循环来跑
for i in range(num_blocks-1):
layers.append(block(middle_out))
return nn.Sequential(*layers)
layer_34_conv4_x=make_layers(ResidualUnit,256,6,False)
len(layer_34_conv4_x)
6
#用*来将我们的列表中的元素拆开,放入nn.Sequential中
nn.Sequential(*layer_34_conv4_x)
查看我们的测试的输出结果
Sequential(
(0): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(1): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(2): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(3): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(4): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(5): ResidualUnit(
(fit_): Sequential(
(0): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(skipconv): Sequential(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
)
#34层网络,conv2_x,紧跟在conv1后的首个架构
#不缩小特征图的尺寸,每层输出的都是64,3个块
conv2_x_34=make_layers(ResidualUnit,64,3,afterconv1=True)
datashape=(10,64,56,56)
summary(conv2_x_34,datashape,depth=1,device="cpu")
查看我们的测试的输出结果
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
Sequential [10, 64, 56, 56] --
├─ResidualUnit: 1-1 [10, 64, 56, 56] 78,208
├─ResidualUnit: 1-2 [10, 64, 56, 56] 78,208
├─ResidualUnit: 1-3 [10, 64, 56, 56] 78,208
==========================================================================================
Total params: 234,624
Trainable params: 234,624
Non-trainable params: 0
Total mult-adds (G): 6.94
==========================================================================================
Input size (MB): 8.03
Forward/backward pass size (MB): 192.68
Params size (MB): 0.89
Estimated Total Size (MB): 201.59
==========================================================================================
conv2_x_101=make_layers(Bottleneck,64,3,afterconv1=True)
datashape=(10,64,56,56)
summary(conv2_x_101,datashape,depth=3,device="cpu")
查看我们的测试的输出结果
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
Sequential [10, 256, 56, 56] --
├─Bottleneck: 1-1 [10, 256, 56, 56] --
│ └─Sequential: 2-1 [10, 256, 56, 56] --
│ │ └─Sequential: 3-1 [10, 64, 56, 56] 4,224
│ │ └─ReLU: 3-2 [10, 64, 56, 56] --
│ │ └─Sequential: 3-3 [10, 64, 56, 56] 36,992
│ │ └─ReLU: 3-4 [10, 64, 56, 56] --
│ │ └─Sequential: 3-5 [10, 256, 56, 56] 16,896
│ └─Sequential: 2-2 [10, 256, 56, 56] --
│ │ └─Conv2d: 3-6 [10, 256, 56, 56] 16,384
│ │ └─BatchNorm2d: 3-7 [10, 256, 56, 56] 512
│ └─ReLU: 2-3 [10, 256, 56, 56] --
├─Bottleneck: 1-2 [10, 256, 56, 56] --
│ └─Sequential: 2-4 [10, 256, 56, 56] --
│ │ └─Sequential: 3-8 [10, 64, 56, 56] 16,512
│ │ └─ReLU: 3-9 [10, 64, 56, 56] --
│ │ └─Sequential: 3-10 [10, 64, 56, 56] 36,992
│ │ └─ReLU: 3-11 [10, 64, 56, 56] --
│ │ └─Sequential: 3-12 [10, 256, 56, 56] 16,896
│ └─Sequential: 2-5 [10, 256, 56, 56] --
│ │ └─Conv2d: 3-13 [10, 256, 56, 56] 65,536
│ │ └─BatchNorm2d: 3-14 [10, 256, 56, 56] 512
│ └─ReLU: 2-6 [10, 256, 56, 56] --
├─Bottleneck: 1-3 [10, 256, 56, 56] --
│ └─Sequential: 2-7 [10, 256, 56, 56] --
│ │ └─Sequential: 3-15 [10, 64, 56, 56] 16,512
│ │ └─ReLU: 3-16 [10, 64, 56, 56] --
│ │ └─Sequential: 3-17 [10, 64, 56, 56] 36,992
│ │ └─ReLU: 3-18 [10, 64, 56, 56] --
│ │ └─Sequential: 3-19 [10, 256, 56, 56] 16,896
│ └─Sequential: 2-8 [10, 256, 56, 56] --
│ │ └─Conv2d: 3-20 [10, 256, 56, 56] 65,536
│ │ └─BatchNorm2d: 3-21 [10, 256, 56, 56] 512
│ └─ReLU: 2-9 [10, 256, 56, 56] --
==========================================================================================
Total params: 347,904
Trainable params: 347,904
Non-trainable params: 0
Total mult-adds (G): 10.79
==========================================================================================
Input size (MB): 8.03
Forward/backward pass size (MB): 963.38
Params size (MB): 1.39
Estimated Total Size (MB): 972.80
==========================================================================================
conv4_x_101=make_layers(Bottleneck,256,23)
datashape=(10,512,28,28)
summary(conv4_x_101,datashape,depth=1,device="cpu")
查看我们的测试的输出结果
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
Sequential [10, 1024, 14, 14] --
├─Bottleneck: 1-1 [10, 1024, 14, 14] 1,512,448
├─Bottleneck: 1-2 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-3 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-4 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-5 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-6 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-7 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-8 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-9 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-10 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-11 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-12 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-13 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-14 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-15 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-16 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-17 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-18 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-19 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-20 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-21 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-22 [10, 1024, 14, 14] 2,167,808
├─Bottleneck: 1-23 [10, 1024, 14, 14] 2,167,808
==========================================================================================
Total params: 49,204,224
Trainable params: 49,204,224
Non-trainable params: 0
Total mult-adds (G): 96.21
==========================================================================================
Input size (MB): 16.06
Forward/backward pass size (MB): 1846.48
Params size (MB): 196.82
Estimated Total Size (MB): 2059.35
==========================================================================================
定义完整的架构
#构建残差网络
class ResNet(nn.Module):
def __init__(self,block:Type[Union[ResidualUnit,Bottleneck]],layers:List[int],num_classes:int):
super().__init__()
#Layer1:卷积+池化
self.layer1=nn.Sequential(nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)
,nn.BatchNorm2d(64)
,nn.ReLU(inplace=True)
,nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True))
#Layer2-Layers5
self.layer2_x=make_layers(block,64,layers[0],afterconv1=True)
self.layer3_x=make_layers(block,128,layers[1])
self.layer4_x=make_layers(block,256,layers[2])
self.layer5_x=make_layers(block,512,layers[3])
#全局平均池化
self.avgpool=nn.AdaptiveAvgPool2d((1,1))
#分类
if block==ResidualUnit:
self.fc=nn.Linear(512,num_classes)
else:
self.fc=nn.Linear(2048,num_classes)
def forward(self,x):
x=self.layer1(x)
x=self.layer5_x(self.layer4_x(self.layer3_x(self.layer2_x(x))))
x=self.avgpool(x)
#将x拉成一维
x=torch.flatten(x,1)
x=self.fc(x)
#进行测试
datashape=(10,3,224,224)
res34=ResNet(ResidualUnit,[3,4,6,3],num_classes=1000)
res101=ResNet(Bottleneck,[3,4,23,3],num_classes=1000)
summary(res34,datashape,depth=1,device="cpu")
查看我们的测试的输出结果
===============================================================================================
Layer (type:depth-idx) Output Shape Param #
===============================================================================================
ResNet -- --
├─Sequential: 1-1 [10, 64, 56, 56] 9,536
├─Sequential: 1-2 [10, 64, 56, 56] 234,624
├─Sequential: 1-3 [10, 128, 28, 28] 1,166,336
├─Sequential: 1-4 [10, 256, 14, 14] 7,152,640
├─Sequential: 1-5 [10, 512, 7, 7] 13,640,704
├─AdaptiveAvgPool2d: 1-6 [10, 512, 1, 1] --
├─Linear: 1-7 [10, 1000] 513,000
===============================================================================================
Total params: 22,716,840
Trainable params: 22,716,840
Non-trainable params: 0
Total mult-adds (G): 36.64
===============================================================================================
Input size (MB): 6.02
Forward/backward pass size (MB): 598.18
Params size (MB): 87.19
Estimated Total Size (MB): 691.39
===============================================================================================