Pytorch+几种视觉神经网络整理

最新推荐文章于 2023-12-30 02:23:23 发布

__Lu__

最新推荐文章于 2023-12-30 02:23:23 发布

阅读量435

点赞数 1

分类专栏：机器学习

本文链接：https://blog.csdn.net/weixin_42617297/article/details/116862444

版权

机器学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

pytorch+几种视觉神经网络整理

1、LeNet
2、AlexNet
3、VGGNet11
4、VGGNet16
5、GoogLeNet
6、ResNet18
7、DenseNet
8、ResNeXt
9、SqueezeNet
10、MobileNet
11、ShuffleNet
12、SEResNet18

1、LeNet

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        #第一层conv1卷积层，in_channel=1,output_channel=6,kernel_size=5*5,input_size=32*32,output_size=28*28
        self.conv1=nn.Conv2d(3,6,5)   # 这里是正常的RGB图像，所以图像就改成了正常的3层了
        
        #第二层conv2，output_channel=6 ,kernel 5*5,output_size=10*10,input_size=14*14，因为上面池化了的话
        self.conv2=nn.Conv2d(6,16,5)
        
        self.fc1=nn.Linear(16*5*5,120)
        
        self.fc2=nn.Linear(120,80)
        
        self.fc3=nn.Linear(80,10)#不用增加softmax层，在cross_entropy的Loss中自动增加了Softmax
        #合在一起进行了优化如果分开优化就没有用了
        
    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),2)  ## 卷积、激活、再池化
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(x.shape[0],-1)  # flatten操作
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)  ## 最后一层不用softmax，因为loss和这个softmax优化合并在了一起
        return x

小笔记：
lenet结构

    32*32*3→卷积，5*5，→28*28*16→最大池化→14*14*16
         →卷积，5*5*6→10*10*16→最大池化→5*5*16→
    flatten，80
    →全连接→（80,120）
    →全连接→（120,80）
    →全连接→（80,10）
    →softmax

2、AlexNet

class AlexNet(nn.Module):
    def __init__(self):#init函数定义的是网络的架构、关键的网络模块、模组
        super(AlexNet,self).__init__()
        self.feature_block=nn.Sequential(
            nn.Conv2d(3,64,kernel_size=11,stride=4,padding=2),## 3通道。（227+2*2-11）/4+1=56
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),  #（56-3）/2+1=27
            
            nn.Conv2d(64,192,kernel_size=5,padding=2), #27
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),  # （27-3）/2+1=13
            
            nn.Conv2d(192,384,kernel_size=3,padding=1),  # 13
            nn.ReLU(inplace=True),
            nn.Conv2d(384,256,kernel_size=3,padding=1),  #13
            nn.ReLU(inplace=True),
            nn.Conv2d(256,256,kernel_size=3,padding=1),  #13
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2)
        )
        
        self.avgpool=nn.AdaptiveAvgPool2d((6,6))
        # 无论你的图是什么样子的，最终一定是会变成6*6的featuremap。自适应的
        
        self.class_block=nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6,4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(inplace=True),
            
            nn.Linear(4096,10),
        )
    def forward(self,x):#数据的正向流
        x=self.feature_block(x)
        x=self.avgpool(x)
        x=x.view(x.size(0),256*6*6)  # 拉平，x.size(0)代表的是你的batch数
        x=self.class_block(x)
        return x

Alexnet结构

    227*227*3→卷积，k=11、s=4、p=2，→56*56*64→relu激活→最大池化，k=3、s=2→27*27*64
             →卷积，k=5、p=2，→27*27*192→relu激活→最大池化，k=3、s=2→13*13*192
             →卷积，k=3、p=1，→13*13*384→relu激活
             →卷积，k=3、p=1，→13*13*256→relu激活
             →卷积，k=3、p=1，→13*13*256→relu激活→最大池化，k=3、s=2→6*6*256
    
    →自适应调整成6*6*256
    
    →flatten，6*6*256
    →dropout，全连接→（6*6*256，4096）→relu激活
    →dropout，全连接→（4096，4096）→relu激活
    →dropout，全连接→（4096，10）
    →softmax

3、VGGNet11

class VGGBlock(nn.Module):
    def __init__(self,in_channels,out_channels,batch_norm):#在后来改良后的VGG网络增加了BatchNorm
        super(VGGBlock,self).__init__()
        stack=[]
        
        stack.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
        if batch_norm:
            stack.append(nn.BatchNorm2d(out_channels))
        stack.append(nn.ReLU(inplace=True))
        
        self.model_block=nn.Sequential(*stack)
        
    def forward(self,x):
        return self.model_block(x)

class VGGNet11(nn.Module):
    def __init__(self,block,pool,batch_norm):#block是一个网络模组抽象，就是上面的VGGBlock，pool也是pooling层的抽象
        super(VGGNet11,self).__init__()
        
        self.feature_block=nn.Sequential(
            block(3,64,batch_norm), #32*32。3通道
            pool(kernel_size=2,stride=2),#16*16
            
            block(64,128,batch_norm),
            pool(kernel_size=2,stride=2),#8*8
            
            block(128,256,batch_norm),
            block(256,256,batch_norm),
            pool(kernel_size=2,stride=2),#4*4
            
            block(256,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#2*2
            
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#1*1
        )
        self.classifier=nn.Linear(512,10)
        
    def forward(self,x):
        x=self.feature_block(x)
        x=x.view(x.shape[0],-1)  # flatten
        x=self.classifier(x)
        return x

小笔记：
VGGBet11结构

    VGGBlock结构,H、W的size不变
    if参数batch_norm=True：
        →卷积，k=3、p=1→BN→relu激活
    else if参数batch_norm=False：
        →卷积，k=3、p=1→relu激活
    
    32*32*3→
    VGGBlock(3,64,batch_norm=True)→32*32*64→池化，k=2、s=2→16*16*64→
    VGGBlock(64,128,batch_norm=True)→16*16*128→池化，k=2、s=2→8*8*128→
    VGGBlock(128,256,batch_norm=True)→8*8*256
        →VGGBlock(256,256,batch_norm=True)→8*8*256
        →池化，k=2、s=2→4*4*256→
    VGGBlock(256,512,batch_norm=True)→4*4*512
        →VGGBlock(512,512,batch_norm=True)→4*4*512
        →池化，k=2、s=2→2*2*512→
    VGGBlock(512,512,batch_norm=True)→2*2*512
        →VGGBlock(512,512,batch_norm=True)→2*2*512
        →池化，k=2、s=2→1*1*512→
    flatten，512，→
    全连接→（512,10）→
    softmax

4、VGGNet16

class VGGNet16(nn.Module):
    def __init__(self,block,pool,batch_norm):#block是一个网络模组抽象，pool也是pooling层的抽象
        super(VGGNet16,self).__init__()
        
        self.feature_block=nn.Sequential(
            block(3,64,batch_norm), #32*32。3通道
            block(64,64,batch_norm), #32*32
            pool(kernel_size=2,stride=2),#16*16
            
            block(64,128,batch_norm),
            block(128,128,batch_norm),
            pool(kernel_size=2,stride=2),#8*8
            
            block(128,256,batch_norm),
            block(256,256,batch_norm),
            pool(kernel_size=2,stride=2),#4*4
            
            block(256,512,batch_norm),
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#2*2
            
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            block(512,512,batch_norm),
            pool(kernel_size=2,stride=2),#1*1
        )
        self.classifier=nn.Linear(512,10)
        
    def forward(self,x):
        x=self.feature_block(x)
        x=x.view(x.shape[0],-1)
        x=self.classifier(x)
        return x

小笔记：
VGGBet16结构

    VGGBlock结构,H、W的size不变
    if参数batch_norm=True：
        →卷积，k=3、p=1→BN→relu激活
    else if参数batch_norm=False：
        →卷积，k=3、p=1→relu激活
    
    32*32*3→
    VGGBlock(3,64,batch_norm=True)→32*32*64
        →VGGBlock(64,64,batch_norm=True)→32*32*64
        →池化，k=2、s=2→16*16*64→
    VGGBlock(64,128,batch_norm=True)→16*16*128
        →VGGBlock(128,128,batch_norm=True)→16*16*128
        →池化，k=2、s=2→8*8*128→
    VGGBlock(128,256,batch_norm=True)→8*8*256
        →VGGBlock(256,256,batch_norm=True)→8*8*256
        →VGGBlock(256,256,batch_norm=True)→8*8*256
        →池化，k=2、s=2→4*4*256→
    VGGBlock(256,512,batch_norm=True)→4*4*512
        →VGGBlock(512,512,batch_norm=True)→4*4*512
        →VGGBlock(512,512,batch_norm=True)→4*4*512
        →池化，k=2、s=2→2*2*512→
    VGGBlock(512,512,batch_norm=True)→2*2*512
        →VGGBlock(512,512,batch_norm=True)→2*2*512
        →VGGBlock(512,512,batch_norm=True)→2*2*512
        →池化，k=2、s=2→1*1*512→
    flatten，512，→
    全连接→（512,10）→
    softmax

5、GoogLeNet

class Inception(nn.Module):
    def __init__(self,in_planes,n1x1,n3x3red,n3x3,n5x5red,n5x5,pool_planes):
        super(Inception,self).__init__()
        self.b1=nn.Sequential(
            nn.Conv2d(in_planes,n1x1,kernel_size=1),
            nn.BatchNorm2d(n1x1),
            nn.ReLU(True),
        )
        
        self.b2=nn.Sequential(
            nn.Conv2d(in_planes,n3x3red,kernel_size=1),
            nn.BatchNorm2d(n3x3red),
            nn.ReLU(True),
            nn.Conv2d(n3x3red,n3x3,kernel_size=3,padding=1),
            nn.BatchNorm2d(n3x3),
            nn.ReLU(True),
        )
        
        self.b3=nn.Sequential(
            nn.Conv2d(in_planes,n5x5red,kernel_size=1),
            nn.BatchNorm2d(n5x5red),
            nn.ReLU(True),
            nn.Conv2d(n5x5red,n5x5,kernel_size=5,padding=2),
            nn.BatchNorm2d(n5x5),
            nn.ReLU(True),
        )
        
        self.b4=nn.Sequential(
            nn.MaxPool2d(3,stride=1,padding=1),
            nn.Conv2d(in_planes,pool_planes,kernel_size=1),
            nn.BatchNorm2d(pool_planes),
            nn.ReLU(True),
        )
        
    def forward(self,x):
        x1=self.b1(x)
        x2=self.b2(x)
        x3=self.b3(x)
        x4=self.b4(x)
        #concat4层输入在一起
        return torch.cat([x1,x2,x3,x4],1)
    
class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet,self).__init__()
        self.feature_block=nn.Sequential(
            # nn.Conv2d(1,192,kernel_size=3,padding=1), # （32+2*1-3）+1=32
            nn.Conv2d(3,192,kernel_size=3,padding=1), # 3通道
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        self.a3=Inception(192,64,96,128,16,32,32) #64+128+32+32=256
        self.b3=Inception(256, 128, 128, 192, 32, 96, 64)  #128+192+96+64=480
        self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
        ## 到这里还没有改变featuremap的长和宽，我们要让他缩小一倍
        # （32+2*1-3）/2+1=16
        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)  # 192+208+48+64=512
        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)  # 160+224+64+64=512
        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)  # 128+256+64+64=512
        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)  # 112+288+64+64=528
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)  # 256+320+128+128=832
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)  # 384+384+128+128=1024
        self.avgpool=nn.AvgPool2d(8,stride=1)
        self.linear=nn.Linear(1024,10)
    
    def forward(self,x):
        out=self.feature_block(x)  # 32*32
        out=self.a3(out)
        out=self.b3(out)
        out=self.maxpool(out)  # 16*16
        out=self.a4(out)
        out=self.b4(out)
        out=self.c4(out)
        out=self.d4(out)
        out=self.e4(out)
        out = self.maxpool(out)  ## 8*8
        out = self.a5(out)
        out = self.b5(out)
        out = self.avgpool(out)  ## 1*1
        out = out.view(out.size(0), -1)  # 1024
        out = self.linear(out)
        return out

class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet,self).__init__()
        self.feature_block=nn.Sequential(
            nn.Conv2d(3,192,kernel_size=3,padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        self.a3=Inception(192,64,96,128,16,32,32)
        self.b3=Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool=nn.AvgPool2d(8,stride=1)
        self.linear=nn.Linear(1024,10)
    
    def forward(self,x):
        out=self.feature_block(x)
        out=self.a3(out)
        out=self.b3(out)
        out=self.maxpool(out)
        out=self.a4(out)
        out=self.b4(out)
        out=self.c4(out)
        out=self.d4(out)
        out=self.e4(out)
        out = self.maxpool(out)
        out = self.a5(out)
        out = self.b5(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

小笔记：
Googlenet结构
    
    Inception模块：
        参数in_planes,  n1x1,  n3x3red,n3x3,  n5x5red,n5x5,  n5x5
        → b1：卷积，k=1→(,,n1x1)→BN→relu激活
        → b2：卷积，k=1→(,,n3x3red)→BN→relu激活→卷积，k=3、p=1→(,,n3x3)→BN→relu激活
        → b3：卷积，k=1→(,,n5x5red)→BN→relu激活→卷积，k=5、p=2→(,,n5x5)→BN→relu激活
        → b4：池化，k=3、s=1、p=1→(,,in_planes)→卷积，k=1→(,,pool_planes)→BN→relu激活
        → cat(b1,b2,b3,b4)→(,,n1x1+n3x3+n5x5+n5x5)
        备注：该操作H、W的size没有改变，最后输出通道数为n1x1+n3x3+n5x5+n5x5
    
    32*32*3→
    卷积，k=3、p=1→32*32*192→BN→relu激活→
    
    a3：Inception(192, 64, 96,128, 16,32, 32)→32*32*（64+128+32+32=256）→
    b3：Inception(256, 128, 128,192, 32,96, 64)→32*32*（128+192+96+64=480）→
    池化，k=3、s=2、p=1→16*16*480
    
    a4：Inception(480, 192, 96,208, 16,48, 64) →16*16*（192+208+48+64=512）→
    b4：Inception(512, 160, 112,224, 24,64, 64)→16*16*（160+224+64+64=512）→
    c4：Inception(512, 128, 128,256, 24,64, 64)→16*16*（128+256+64+64=512）→
    d4：Inception(512, 112, 144,288, 32,64, 64)→16*16*（112+288+64+64=528）→
    e4：Inception(528, 256, 160,320, 32,128, 128) →16*16*（256+320+128+128=832）→
    最大池化，k=2，→8*8*416
    
    a5：Inception(832, 256, 160,320, 32,128, 128)→8*8*（256+320+128+128=832）→
    b5：Inception(832, 384, 192,384, 48,128, 128)→8*8*（384+384+128+128=1024）→
    平均池化，k=8、s=1→1*1*1024
    
    flatten，→1024
    → 全连接 → （1024,10）
    →softmax

6、ResNet18

class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(ResNetBlock,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        # 如果stride=1，尺度不变；如果stride=2，举例（32+2*1-3）/2+1=16
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels:
            # 如果stride != 1，比如stride=2
            # 或者in_channels != out_channels
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.downsample(x) #ResNet的add操作，其实是张量的加和
        out = F.relu(out)
        return out

class ResNetLayer(nn.Module):
    def __init__(self, block, n_blocks, in_channels, out_channels, stride):
        ## block表示上面的ResNetBlock，n_blocks表示对应的block个数
        
        super(ResNetLayer,self).__init__()
        self.modules = []
        self.modules.append(block(in_channels,out_channels,stride))
        # 如果上面的stride=1的话，接下来就是block定义的2层，而且接下来的由n_blocks-1的值确定；
        # 如果上面的stride=2的话，接下来就是block定义的2层(下采样)加上下采样的一层，而且接下来的依然由n_blocks-1的值确定
        
        for _ in range(n_blocks-1):
            self.modules.append(block(out_channels,out_channels,1))
        self.blocks = nn.Sequential(*self.modules)
        
    def forward(self,x):
        return self.blocks(x)
    
class ResNet18(nn.Module):
    def __init__(self,layer,block):## layer是上面的ResNetLayer，block是上面的ResNetBlock
        super(ResNet18,self).__init__()
        n_blocks = [2,2,2,2]
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) # 卷积，BN。3通道
        self.bn1 = nn.BatchNorm2d(64)
        
        self.rb1 = layer(block, n_blocks[0], 64, 64, 1) # 32*32  卷积、BN，卷积、BN；卷积、BN
        self.rb2 = layer(block, n_blocks[1], 64, 128, 2) 
        # 卷积、BN，卷积、BN，16*16，+，卷积，BN，16*16；卷积、BN
        self.rb3 = layer(block, n_blocks[2], 128, 256, 2) 
        # 卷积、BN，卷积、BN，8*8，+，卷积，BN，8*8；卷积、BN
        self.rb4 = layer(block, n_blocks[3], 256, 512, 2) 
        # 卷积、BN，卷积、BN，4*4，+，卷积，BN，4*4；卷积、BN
        self.fc = nn.Linear(512,10)
        
    def forward(self,x):
        out = F.relu(self.bn1(self.conv1(x)))  ## 32*32
        out = self.rb1(out)  #32*32
        out = self.rb2(out)  # 16*16
        out = self.rb3(out)  # 8*8
        out = self.rb4(out)  # 4*4
        out = F.avg_pool2d(out,4)   # 1*1
        out = out.view(out.shape[0],-1)
        out = self.fc(out)
        return out
        
#ResNet34->[3,4,6,3]

小笔记：
resnet18网络结构

    ResNetBlock模块：
        参数：in_channels, out_channels, stride
        if参数stride=1：
            卷积，k=3、s=1、p=1、bias=False→(*,*,out_channels)→BN→relu激活
            卷积，k=3、s=1、p=1、bias=False→(*,*,out_channels)→BN→out
            if参数in_channels！=out_channels：
                downsample：卷积，k=1、s=stride=1、bias=False→(*,*,in_channels)_(*,*,out_channels)→BN→
        else if参数stride！=1：
            卷积，k=3、s=stride、p=1、bias=False→(1/2*,1/2*,out_channels)→BN→relu激活
            卷积，k=3、s=1、p=1、bias=False→(1/2*,1/2*,out_channels)→BN→out
            downsample：卷积，k=1、s=stride、bias=False→(1/2*,1/2*,in_channels)_(*,*,out_channels)→BN→
        add(out,downsample)→relu激活

    ResNetLayer层：
        参数：block, n_blocks, in_channels, out_channels, stride
        block就是上面的ResNetBlock，下面改用ResNetBlock，n_blocks是个数字
        →
        ResNetBlock(in_channels, out_channels, stride)→
        for _ in range(n_blocks-1):
            →ResNetBlock(out_channels, out_channels, 1)
    
    
    resnet18：
        参数：layer,block
        layer就是上面的ResNetLayer，block就是上面的ResNetBlock
        n_blocks=[2,2,2,2]
        
        卷积，k=3、s=1、p=1、bias=False→32*32*64→BN→ relu激活→
        
        ResNetLayer(ResNetBlock, n_blocks[0]=2, 64, 64, 1)：
            （因为in_channels=out_channels，所以没有downsample层）
            ResNetBlock(64, 64, 1)：
                卷积，→32*32*64→BN→relu激活→
                卷积，→32*32*64→BN→out→relu激活→
            ResNetBlock(64, 64, 1)：
                卷积，→32*32*64→BN→relu激活→
                卷积，→32*32*64→BN→out→relu激活→
        
        ResNetLayer(ResNetBlock,n_blocks[1]=2, 64, 128, 2)：
            ResNetBlock(64, 128, 2)：
                卷积，→16*16*128→BN→relu激活→
                卷积，→16*16*128→BN→out
                downsample：由32*32*64→16*16*128→
                add(out,downsample)→relu激活→
            ResNetBlock(128, 128, 1)：
                卷积，→16*16*128→BN→relu激活→
                卷积，→16*16*128→BN→out→relu激活→
        
        ResNetLayer(ResNetBlock,n_blocks[2]=2, 128, 256, 2)：
            ResNetBlock(128, 256, 2)：
                卷积，→8*8*256→BN→relu激活→
                卷积，→8*8*256→BN→out
                downsample：由16*16*128→8*8*256→
                add(out,downsample)→relu激活→
            ResNetBlock(256, 256, 1)：
                卷积，→8*8*256→BN→relu激活→
                卷积，→8*8*256→BN→out→relu激活→
        
        ResNetLayer(ResNetBlock,n_blocks[2]=2, 256, 512, 2)：
            ResNetBlock(256, 512, 2)：
                卷积，→4*4*512→BN→relu激活→
                卷积，→4*4*512→BN→out
                downsample：由8*8*256→4*4*512→
                add(out,downsample)→relu激活→
            ResNetBlock(512, 512, 1)：
                卷积，→4*4*512→BN→relu激活→
                卷积，→4*4*512→BN→out→relu激活→
        
        平均池化，k=4→1*1*512→
        flatten，→512→
        全连接→（512，10）→
        softmax

7、DenseNet

import math
class Bottleneck(nn.Module):
    def __init__(self,in_planes,growth_rate):
        # 最终的输出就是看你的这个增长率growth_rate
        
        super(Bottleneck,self).__init__()
        self.bn1=nn.BatchNorm2d(in_planes)
        self.conv1=nn.Conv2d(in_planes,4*growth_rate,kernel_size=1,bias=False)
        
        self.bn2=nn.BatchNorm2d(4*growth_rate)
        self.conv2=nn.Conv2d(4*growth_rate,growth_rate,kernel_size=3,padding=1,bias=False)
        
    def forward(self,x):
        out=self.conv1(F.relu(self.bn1(x)))  #pre-activation
        out=self.conv2(F.relu(self.bn2(out)))
        out=torch.cat([out,x],1)  # BN、卷积，BN、卷积之后的再加上原来的
        return out
    
    
class Transition(nn.Module):
    def __init__(self,in_planes,out_planes):
        super(Transition,self).__init__()
        self.bn=nn.BatchNorm2d(in_planes)
        self.conv=nn.Conv2d(in_planes,out_planes,kernel_size=1,bias=False)
    def forward(self,x):
        out=self.conv(F.relu(self.bn(x)))  # BN、卷积
        out=F.avg_pool2d(out,2)  # 池化、下采样
        return out
    
    
class DenseNet(nn.Module):
    def __init__(self,block,nblocks,growth_rate=12,reduction=0.5,num_classes=10):
        # block就是上面的Bottleneck
        # reduction就是压缩
        
        super(DenseNet,self).__init__()
        self.growth_rate=growth_rate
        num_planes=2*growth_rate #32*32
        #最初的感知层
        self.conv1=nn.Conv2d(3,num_planes,kernel_size=3,padding=1,bias=False) #3通道
        
        #第一个DenseBlock
        self.dense1=self._make_dense_layers(block,num_planes,nblocks[0])
        # 最后nblocks[0]*num_planes通道；size不变
        num_planes+=nblocks[0]*growth_rate
        out_planes=int(math.floor(num_planes*reduction))  # 降低一半
        self.trans1=Transition(num_planes,out_planes)  # 通道减半，size也减半
        num_planes=out_planes
        
        #第二个DenseBlock  16*16
        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate  # 计算如果不压缩的话的输出
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes
        
        #第三个DenseBlock  8*8
        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes
        
        #第四个DenseBlock  4*4
        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate
        
        #分类层
        self.bn=nn.BatchNorm2d(num_planes)
        
        self.linear=nn.Linear(num_planes,num_classes)   ## 全局平均池化再全连接softmax
    
    
    def _make_dense_layers(self,block,in_planes,nblock):
        #block:bottleneck
        #nblock代表构建denseblock中有多少bottleneck层
        layers=[]
        for i in range(nblock):
            layers.append(block(in_planes,self.growth_rate))
            #BN、卷积，BN、卷积之后的再加上原来的；size不变，growth_rate通道
            in_planes+=self.growth_rate   # 这个是concat之后的通道数，作为下一个循环的输入
        return nn.Sequential(*layers)
    
    def forward(self,x):
        out=self.conv1(x)  # 卷积，升到2*growth_rate通道
        
        out=self.trans1(self.dense1(out))  # 升至nblocks[0]*growth_rate通道，再减半
        out = self.trans2(self.dense2(out))  # 升至nblocks[1]*growth_rate通道，再减半
        out = self.trans3(self.dense3(out))  # 升至nblocks[2]*growth_rate通道，再减半
        out = self.dense4(out)  # 升至nblocks[3]*growth_rate通道
        
        out=F.avg_pool2d(F.relu(self.bn(out)),4)  # BN、再全局平均采样，1*1
        out=out.view(out.size(0),-1)  # flatten
        out=self.linear(out)  # softmax分类
        return out

def DenseNet121():
    return DenseNet(Bottleneck,[6,12,24,16],growth_rate=32)

小笔记：
densenet网络结构
    
    Bottleneck模块：
        参数：in_planes, growth_rate
        x→
        BN→relu激活→卷积，k=1、bias=False→(,,4*growth_rate)→
        BN→relu激活→卷积，k=3、p=1、bias=False→(,,growth_rate)→out
        cat(out,x）→
        备注：H、W的size不变，输出通道数为growth_rate
    
    Transition模块：
        参数：in_planes,out_planes
        BN→relu激活→卷积，k=1、bias=False→(,,out_planes)→
        平均池化，k=2→下采样，size减半→
    
    
    
    DenseNet121网络结构：
        参数：block, nblocks, growth_rate=12, reduction=0.5, num_classes=10
        block就是上面的Bottleneck，reduction就是压缩比率
        growth_rate = growth_rate， num_planes = 2*growth_rate
        
        _make_dense_layers函数：
            参数block, in_planes, nblock
            block就是上面的Bottleneck，nblock代表构建denseblock中有多少bottleneck层，是一个数字
            for i in range(nblock):
                Bottleneck(in_planes, growth_rate):
                    x→
                    BN→relu激活→卷积，k=1、bias=False→(,,4*growth_rate)→
                    BN→relu激活→卷积，k=3、p=1、bias=False→(,,growth_rate)→out
                    cat(out,x）→
                in_planes += growth_rate
        
        nblocks = [6,12,24,16]
        growth_rate=32, num_planes=2*growth_rate=64
        
        卷积，k=3、p=1、bias=False→32*32*（2*growth_rate=64）
        
        dense1：
        _make_dense_layers(Bottleneck, in_planes=num_planes=64, nblocks[0]=6):
            Bottleneck(in_planes, growth_rate):
               x→BN→relu→卷积→32*32*(4*32)→BN→relu→卷积→32*32*32→out
               cat(out,x）→32*32*92→
               in_planes=in_planes+growth_rate=92
            i=1,Bottleneck→卷积→卷积→32*32*（64+2*32）→
            ......
            i=5,Bottleneck→卷积→卷积→32*32*（64+6*32），即32*32*256→
        num_planes += nblocks[0]*growth_rate→64+6*32=256
        out_planes = int(num_planes*reduction)=256*0.5=128
        trans1
        Transition(num_planes,out_planes)：
            卷积→平均池化→16*16*128
        num_planes = out_planes=128
        
        dense2：
        _make_dense_layers(Bottleneck, in_planes=num_planes=128, nblocks[1]=12):
            Bottleneck(in_planes, growth_rate):
               x→BN→relu→卷积→16*16*(4*32)→BN→relu→卷积→16*16*32→out
               cat(out,x）→16*16*160→
               in_planes=in_planes+growth_rate=128+32=160
            i=1,Bottleneck→卷积→卷积→16*16*（128+2*32）→
            ......
            i=11,Bottleneck→卷积→卷积→16*16*（128+12*32），16*16*512→
        num_planes += nblocks[0]*growth_rate→128+12*32=512
        out_planes = int(num_planes*reduction)=512*0.5=256
        trans2
        Transition(num_planes,out_planes)：
            卷积→平均池化→8*8*256
        num_planes = out_planes=256
        
        dense3：
        _make_dense_layers(Bottleneck, in_planes=num_planes=256, nblocks[2]=24):
            Bottleneck(in_planes, growth_rate):
               x→BN→relu→卷积→8*8*(4*32)→BN→relu→卷积→8*8*32→out
               cat(out,x）→8*8*288→
               in_planes=in_planes+growth_rate=256+32=288
            i=1,Bottleneck→卷积→卷积→8*8*（256+2*32）→
            ......
            i=23,Bottleneck→卷积→卷积→8*8*（256+24*32），8*8*1024→
        num_planes += nblocks[0]*growth_rate→256+24*32=1024
        out_planes = int(num_planes*reduction)=1024*0.5=512
        trans3
        Transition(num_planes,out_planes)：
            卷积→平均池化→4*4*512
        num_planes = out_planes=512
        
        dense4：
        _make_dense_layers(Bottleneck, in_planes=num_planes=512, nblocks[3]=16):
            Bottleneck(in_planes, growth_rate):
               x→BN→relu→卷积→4*4*(4*32)→BN→relu→卷积→4*4*32→out
               cat(out,x）→4*4*544→
               in_planes=in_planes+growth_rate=512+32=544
            i=1,Bottleneck→卷积→卷积→4*4*（512+2*32）→
            ......
            i=23,Bottleneck→卷积→卷积→4*4*（512+16*32），4*4*1024→
        num_planes += nblocks[0]*growth_rate→512+16*32=1024
        
        BN→relu激活→全局平均，k=4→1*1*1024→
        
        flatten→1024→
        全连接→（1024,10）→
        softmax

8、ResNeXt

class Block(nn.Module):
    
    expansion = 2   # 扩张系数

    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
        super(Block, self).__init__()
        group_width = cardinality * bottleneck_width   # 32*4=128，后续增长至128*2=256
        
        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(group_width)   # 提升到128通道
        
        self.conv2 = nn.Conv2d(group_width, group_width, 
                               kernel_size=3, stride=stride, ## 为了防止有的时候需要压缩
                               padding=1, groups=cardinality,   ## 分组
                               bias=False)
        # 当stride=1时，大小不变；stride=2时，大小降一倍
        # groups参数是分组参数，每组通道数64/cardinality
        self.bn2 = nn.BatchNorm2d(group_width)
        
        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, # 通道数提升至256
                               kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)

        self.shortcut = nn.Sequential()   # 这个引入进来是为了加和
        if stride != 1 or in_planes != self.expansion*group_width:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*group_width, 
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*group_width)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNeXt(nn.Module):
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
        ## num_blocks举例子[3,3,3]，cardinality是分组个数，bottleneck_width是每组通道数
        # num_classes是分类个数
        super(ResNeXt, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.in_planes = 64   ## 

        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)   #卷积提升到64个通道
        
        self.layer1 = self._make_layer(num_blocks[0], 1)
        self.layer2 = self._make_layer(num_blocks[1], 2)
        self.layer3 = self._make_layer(num_blocks[2], 2)
        # self.layer4 = self._make_layer(num_blocks[3], 2)
        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)

    def _make_layer(self, num_blocks, stride):   # 这个就是构建层的函数
        strides = [stride] + [1]*(num_blocks-1)
        # stride=1，[1]+[1,1]→[1,1,1];stride=2,[2]+[1,1]→[2,1,1];
        layers = []
        for stride in strides:
            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
        # Increase bottleneck_width by 2 after each stage.
        self.bottleneck_width *= 2
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        # out = self.layer4(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNeXt29_2x64d():
    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)

小笔记
ResNeXt网络结构

    Block类：
        扩张系数expansion = 2
        参数in_planes, 基数cardinality=32, bottleneck_width=4, stride=1
        group_width = cardinality * bottleneck_width=32*4=128
        
        →卷积，k=1,、bias=False→（*，*，128）→BN→relu激活→
        
        →卷积，k=3、stride=stride、p=1、groups=cardinality分组参数、bias=False→
            if参数stride=1，H、W的size不变
            else if参数stride=2，下采样
            →通道总数不变，128
            groups参数是分组参数，每组通道数128/cardinality？
        →BN→relu激活→
        
        卷积，k=1、bias=False→（**，**，expansion*group_width=256）→BN→out→
        
        if stride != 1 or in_planes != expansion*group_width=256:
            shortcut：卷积，k=1、stride=stride、bias=False→
                if参数stride=1，H、W的size不变
                else if参数stride=2，下采样
                →通道数（expansion*group_width=256），（**，**，256）
                →BN
        
        add（out，shortcut）→（**，**，256）→relu激活
    
    
    
    ResNeXt29_2x64d网络结构：
        参数num_blocks, cardinality, bottleneck_width, num_classes=10
        num_blocks举例子[3,3,3]，cardinality是分组个数，bottleneck_width是每组通道数
        num_classes是分类个数
        cardinality = 2，bottleneck_width=64
        in_planes = 64 
        num_blocks=[3,3,3]
        
        _make_layer函数：
            参数num_blocks, stride
            其中，num_blocks是个数字
            strides = [stride] + [1]*(num_blocks-1)
            即是stride=1，[1]+[1,1]→[1,1,1];stride=2,[2]+[1,1]→[2,1,1];
            for stride in strides:
                Block(in_planes, cardinality, bottleneck_width, stride)
                in_planes = Block.expansion * cardinality * bottleneck_width=256
            bottleneck_width *= 2→128
        
        卷积，k=1、bias=False→32*32*64→BN→relu激活→
        
        layer1：
        _make_layer(num_blocks[0], 1)：
            strides=[1,1,1]
            Block（in_planes=64, cardinality=2, bottleneck_width=64, stride=1）：
                group_width=cardinality * bottleneck_width=128
                卷积→32*32*128→BN→relu→
                分组卷积→32*32*128→BN→relu→
                卷积，expansion*group_width=256→32*32*256→BN→out→
                shortcut：in_planes != expansion*group_width=256：
                    卷积→32*32*256→BN→
                add(out,shortcut)→32*32*256→relu激活→
                in_planes=expansion * cardinality * bottleneck_width=256
            Block（in_planes=256, cardinality=2, bottleneck_width=64, stride=1）：
                group_width=cardinality * bottleneck_width=128
                卷积→32*32*128→BN→relu→
                分组卷积→32*32*128→BN→relu→
                卷积，expansion*group_width=256→32*32*256→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=256
            Block（in_planes=256, cardinality=2, bottleneck_width=64, stride=1）：
                group_width=cardinality * bottleneck_width=128
                卷积→32*32*128→BN→relu→
                分组卷积→32*32*128→BN→relu→
                卷积，expansion*group_width=256→32*32*256→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=256
            bottleneck_width *= 2→bottleneck_width=128
        
        layer2：
        _make_layer(num_blocks[1], 2)：
            strides=[2,1,1]
            Block（in_planes=256, cardinality=2, bottleneck_width=128, stride=2）：
                group_width=cardinality * bottleneck_width=256
                卷积→32*32*256→BN→relu→
                分组卷积→16*16*256→BN→relu→
                卷积，expansion*group_width=512→16*16*512→BN→out→
                shortcut：stride！=1：
                    卷积→16*16*512→BN→
                add(out,shortcut)→16*16*512→relu激活→
                in_planes=expansion * cardinality * bottleneck_width=512
            Block（in_planes=512, cardinality=2, bottleneck_width=128, stride=1）：
                group_width=cardinality * bottleneck_width=256
                卷积→16*16*256→BN→relu→
                分组卷积→16*16*256→BN→relu→
                卷积，expansion*group_width=512→16*16*512→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=512
            Block（in_planes=512, cardinality=2, bottleneck_width=128, stride=1）：
                group_width=cardinality * bottleneck_width=256
                卷积→16*16*256→BN→relu→
                分组卷积→16*16*256→BN→relu→
                卷积，expansion*group_width=512→16*16*512→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=512
            bottleneck_width *= 2→bottleneck_width=256
        
        layer3：
        _make_layer(num_blocks[2], 2)
            strides=[2,1,1]
            Block（in_planes=512, cardinality=2, bottleneck_width=256, stride=2）：
                group_width=cardinality * bottleneck_width=512
                卷积→16*16*512→BN→relu→
                分组卷积→8*8*512→BN→relu→
                卷积，expansion*group_width=1024→8*8*1024→BN→out→
                shortcut：stride！=1：
                    卷积→8*8*1024→BN→
                add(out,shortcut)→8*8*1024→relu激活→
                in_planes=expansion * cardinality * bottleneck_width=1024
            Block（in_planes=1024, cardinality=2, bottleneck_width=256, stride=1）：
                group_width=cardinality * bottleneck_width=512
                卷积→8*8*512→BN→relu→
                分组卷积→8*8*512→BN→relu→
                卷积，expansion*group_width=1024→8*8*1024→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=1024
            Block（in_planes=1024, cardinality=2, bottleneck_width=256, stride=1）：
                group_width=cardinality * bottleneck_width=512
                卷积→8*8*512→BN→relu→
                分组卷积→8*8*512→BN→relu→
                卷积，expansion*group_width=1024→8*8*1024→BN→out→
                relu激活→
                in_planes=expansion * cardinality * bottleneck_width=1024
            bottleneck_width *= 2→bottleneck_width=512
        
        平均池化，k=8，→1*1*1024
        flatten，1024→
        全连接→（1024,10）→
        softmax

9、SqueezeNet

from torch import  nn

import torch.nn.init as init

class Fire(nn.Module):
    def __init__(self,inplanes,s1,e1,e3):
        super(Fire,self).__init__()
        self.inplanes=inplanes
        self.squeeze=nn.Conv2d(inplanes,s1,kernel_size=1)
        self.squeeze_activation=nn.ReLU(inplace=True)
        
        self.expand1x1=nn.Conv2d(s1,e1,kernel_size=1)
        self.expand1x1_activation=nn.ReLU(inplace=True)
        
        self.expand3x3=nn.Conv2d(s1,e3,kernel_size=True)   # 这个3*3指的是e3？
        self.expand3x3_activation=nn.ReLU(inplace=True)
        
    def forward(self,x):
        x=self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ],1)
    
class SqueezeNet(nn.Module):

    def __init__(self, version=1.0, num_classes=10):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                # ceil模式就是会把不足square_size的边给保留下来，单独另算，
                #或者也可以理解为在原来的数据上补充了值为-NAN的边。
                # 而floor模式则是直接把不足square_size的边给舍弃了
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                # 在计算输出的shape的时候，如果ceil_mode的值为True，那么则用天花板模式，否则用地板模式floor
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            #nn.ReLU(inplace=True),
            #nn.AvgPool2d(4, stride=1)
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):  # 判断是否是卷积层
                if m is final_conv:   # 是否是最后的卷积层，最后的用normal初始化
                    init.normal(m.weight.data, mean=0.0, std=0.01)
                else:   # 其他的用kaiming_uniform初始化
                    init.kaiming_uniform(m.weight.data)  # 初始化的分布不同
                if m.bias is not None:
                    m.bias.data.zero_()
#         https://www.aiuai.cn/aifarm613.html

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x.view(x.size(0), self.num_classes)

小笔记：
squeezenet网络结构：

    Fire类：
        参数inplanes, s1, e1, e3
        卷积，k=1→（*，*，s1）→relu激活→squeeze
        卷积，k=1，（*，*，s1）→（*，*，e1）→relu激活→expand1x1
        卷积，k=True（为什么不是3？），（*，*，s1）→（*，*，e3）→relu激活→expand3x3
        cat（expand1x1，expand3x3）
        
        
    SqueezeNet网络结构：
        参数version=1.0, num_classes=10
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        
        
        if version == 1.0:
            features：
                卷积，k=7、s=2，→13*13*96→relu激活→最大池化，k=3、s=2、ceil_mode=True，→6*6*96
                
                Fire(96, 16, 64, 64)→卷积→卷积+卷积→6*6*（64+64=128）
                Fire(128, 16, 64, 64)→卷积→卷积+卷积→6*6*（64+64=128）
                Fire(128, 32, 128, 128)→卷积→卷积+卷积→6*6*（128+128=256）
                →最大池化，k=3、s=2、ceil_mode=True，→3*3*256
                
                Fire(256, 32, 128, 128)→卷积→卷积+卷积→6*6*（128+128=256）
                Fire(256, 48, 192, 192)→卷积→卷积+卷积→6*6*（192+192=384）
                Fire(384, 48, 192, 192)→卷积→卷积+卷积→6*6*（192+192=384）
                Fire(384, 64, 256, 256)→卷积→卷积+卷积→6*6*（256+256=512）
                →最大池化，k=3、s=2、ceil_mode=True，→1*1*512
                
                Fire(512, 64, 256, 256)→卷积→卷积+卷积→1*1*（256+256=512）
         
        else if version == 1.1:
            features：
                卷积，k=3、s=2，→15*15*96→relu激活→最大池化，k=3、s=2、ceil_mode=True，→7*7*96
                
                Fire(64, 16, 64, 64)→卷积→卷积+卷积→7*7*（64+64=128）
                Fire(128, 16, 64, 64)→卷积→卷积+卷积→7*7*（64+64=128）
                →最大池化，k=3、s=2、ceil_mode=True，→3*3*128
                
                Fire(128, 32, 128, 128)→卷积→卷积+卷积→3*3*（128+128=256）
                Fire(256, 32, 128, 128)→卷积→卷积+卷积→3*3*（128+128=256）
                →最大池化，k=3、s=2、ceil_mode=True，→1*1*256
                
                Fire(256, 48, 192, 192)→卷积→卷积+卷积→1*1*（192+192=384）
                Fire(384, 48, 192, 192)→卷积→卷积+卷积→1*1*（192+192=384）
                Fire(384, 64, 256, 256)→卷积→卷积+卷积→1*1*（256+256=512）
                Fire(512, 64, 256, 256)→卷积→卷积+卷积→1*1*（256+256=512）
        
        →Dropout(p=0.5)→
        final_conv：卷积，k=1→1*1*10→
        
        import torch.nn.init as init
        for m in (网络的)modules():
            if isinstance(m, nn.Conv2d):  # 判断是否是卷积层
                if m is final_conv:    # 是否是最后的卷积层，最后的用normal初始化
                    init.normal(m.weight.data, mean=0.0, std=0.01)
                else:   # 其他的用kaiming_uniform初始化
                    init.kaiming_uniform(m.weight.data)  # 初始化的分布不同
                if m.bias is not None:
                    m.bias.data.zero_()
                    
        flatten→（batch，num_classes）

10、MobileNet

class Block(nn.Module):
    "DWConv+PointWiseConv"
    def __init__(self,in_planes,out_planes,stride):
        super(Block,self).__init__()
        self.conv1=nn.Conv2d(in_planes,in_planes,kernel_size=3,stride=stride,padding=1,groups=in_planes,bias=False)
        self.bn1=nn.BatchNorm2d(in_planes)
        self.conv2=nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=1,padding=0,bias=False)
        self.bn2=nn.BatchNorm2d(out_planes)
    def forward(self,x):
        out=F.relu(self.bn1(self.conv1(x)))
        out=F.relu(self.bn2(self.conv2(out)))
        return out
    
class MobileNet(nn.Module):
    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]

    def __init__(self, num_classes=10):
        super(MobileNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.linear = nn.Linear(1024, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            out_planes = x if isinstance(x, int) else x[0]
            stride = 1 if isinstance(x, int) else x[1]
            layers.append(Block(in_planes, out_planes, stride))
            in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

小笔记：
MobileNet网络结构：

    Block类：
        参数：in_planes, out_planes, stride
        卷积，k=3,stride=stride,p=1,groups=in_planes,bias=False
            if参数stride=1，H、W的size不变，→（*，*，in_planes）
            else参数stride=2，→（*/2,*/2,in_planes）
            →（**，**，in_planes）→BN→relu激活→
        卷积，k=1,s=1,p=0,bias=False→（**，**，out_planes）→BN→relu激活→
    
    MobileNet网络结构：
        常量cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
        参数num_classes=10
        
        _make_layers函数：
            参数in_planes=32
            for x in cfg：
                out_planes = x if isinstance(x, int) else x[0]
                stride = 1 if isinstance(x, int) else x[1]
                in_planes=32，out_planes=64，stride=1
                Block(in_planes, out_planes, stride)
                in_planes = out_planes
        
        卷积，k=3,s=1,p=1,bias=False→32*32*32→BN→relu激活→
    
        _make_layers(in_planes=32)：
            in_planes=32，out_planes=cfg[0]=64，stride=1
            Block(32, 64, 1)→分组卷积→32*32*32→卷积→32*32*64→in_planes=64
            
            in_planes=64，out_planes=cfg[1][0]=128，stride=cfg[1][1]=2
            Block(64, 128, 2)→分组卷积→16*16*64→卷积→16*16*128→in_planes=128
            
            in_planes=128，out_planes=cfg[2]=128，stride=1
            Block(128, 128, 1)→分组卷积→16*16*128→卷积→16*16*128→in_planes=128
            
            in_planes=128，out_planes=cfg[3][0]=256，stride=cfg[3][1]=2
            Block(128, 256, 2)→分组卷积→8*8*128→卷积→8*8*256→in_planes=256
            
            in_planes=256，out_planes=cfg[4]=256，stride=1
            Block(256, 256, 1)→分组卷积→8*8*256→卷积→8*8*256→in_planes=256
            
            in_planes=256，out_planes=cfg[5][0]=512，stride=cfg[5][1]=2
            Block(256, 512, 2)→分组卷积→4*4*256→卷积4*4*512→in_planes=512
            
            in_planes=512，out_planes=cfg[6]=512，stride=1
            Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
            in_planes=512，out_planes=cfg[7]=512，stride=1
            Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
            in_planes=512，out_planes=cfg[8]=512，stride=1
            Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
            in_planes=512，out_planes=cfg[9]=512，stride=1
            Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
            in_planes=512，out_planes=cfg[10]=512，stride=1
            Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
            
            in_planes=512，out_planes=cfg[11][0]=1024，stride=cfg[11][1]=2
            Block(512, 1024, 2)→分组卷积→2*2*512→卷积2*2*1024→in_planes=1024
            
            in_planes=1024，out_planes=cfg[7]=1024，stride=1
            Block(1024, 1024, 1)→分组卷积→2*2*1024→卷积→2*2*1024→in_planes=1024
            
        平均池化，k=2，→1*1*1024→
        flatten→1024→
        全连接→（1024,10）→
        softmax

11、ShuffleNet

class ShuffleBlock(nn.Module):
    def __init__(self,groups):
        super(ShuffleBlock,self).__init__()
        self.groups=groups
    def forward(self,x):#转置重组操作
        '''
            [N,C,H,W]->分组操作->[N,C/g,H,W]*g->转置重组->[N,g,H,W]*C/g
        '''
        N,C,H,W=x.size()
        g=self.groups
        return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
# permute，原来的1和2做替换

class Bottleneck(nn.Module):
    def __init__(self, in_planes, out_planes, stride, groups):
        super(Bottleneck, self).__init__()
        self.stride = stride

        mid_planes = out_planes/4   # 压缩系数
        g = 1 if in_planes==24 else groups
        
        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_planes)
        self.shuffle1 = ShuffleBlock(groups=g)
        
        self.conv2 = nn.Conv2d(mid_planes, mid_planes, 
                               kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_planes)
        
        self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 2:
            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.shuffle1(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        
        res = self.shortcut(x)
        out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
        return out

class ShuffleNet(nn.Module):
    def __init__(self, cfg):
        super(ShuffleNet, self).__init__()
        out_planes = cfg['out_planes']
        num_blocks = cfg['num_blocks']
        groups = cfg['groups']

        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(24)
        self.in_planes = 24
        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
        self.linear = nn.Linear(out_planes[2], 10)

    def _make_layer(self, out_planes, num_blocks, groups):
        layers = []
        for i in range(num_blocks):
            stride = 2 if i == 0 else 1
            cat_planes = self.in_planes if i == 0 else 0
            layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
            self.in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

小笔记：
ShuffleNet网络结构：

    ShuffleBlock类：
        参数groups
        # 转置重组操作
        # [N,C,H,W]->分组操作->[N,C/g,H,W]*g->转置重组->[N,g,H,W]*C/g
        N,C,H,W = x.size()
        # permute，原来的1和2做替换
        x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
        
    Bottleneck类：
        参数in_planes, out_planes, stride, groups
        压缩系数mid_planes = out_planes/4   
        g = 1 if in_planes==24 else groups
        
        卷积，k=1,groups=g,bias=False→(*,*,mid_planes)→BN→relu激活→ShuffleBlock(groups=g)
        卷积，k=3,s=stride，p=1,groups=mid_planes,bias=False
            if参数stride=1，→(*,*,mid_planes)
            else if参数stride=2，→(*/2,*/2,mid_planes)
            →(**,**,mid_planes)
            →BN→relu激活→
        卷积，k=1,groups=g→(**,**,out_planes)→BN→out
        shortcut：
            if参数stride=2，平均池化，k=3，s=2，p=1→(*/2,*/2,in_planes)
        if stride=2：cat（out，shortcut）→(*/2,*/2,out_planes+in_planes)→relu
        else if stride=1：→(*,*,out_planes)→relu
                       
    ShuffleNet网络结构：
        cfg={
            'out_planes':[200,400,800],
            'num_blocks':[4,8,4],
            'groups':2
        }
        out_planes = cfg['out_planes']
        num_blocks = cfg['num_blocks']
        groups = cfg['groups']
        
        _make_layer函数：
            参数out_planes, num_blocks, groups
            for i in range(num_blocks):
                stride = 2 if i == 0 else 1
                cat_planes = in_planes if i == 0 else 0
                Bottleneck(in_planes, out_planes-cat_planes, stride=stride, groups=groups)
                in_planes = out_planes
        
        卷积，k=1, bias=False→32*32*24→BN→relu激活→
        in_planes = 24
        
        layer1：
        _make_layer(out_planes[0], num_blocks[0], groups)
            out_planes=200, num_blocks=4, groups=2
            i=0，stride = 2，cat_planes = in_planes=24：
                Bottleneck（in_planes=24,out_planes=200-24=176，stride=2,groups=2）
                    mid_planes = out_planes/4=44，
                    分组卷积g=1→32*32*44→→BN→relu→转置重组→
                    分组卷积g=44→16*16*44→BN→relu→分组卷积g=1→16*16*176→BN→out
                    shortcut：平均池化→16*16*24→
                    cat（out，shortcut）→16*16*（176+24=200）→in_planes=200
            i=1，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=200,out_planes=200-0=200，stride=1,groups=2）
                    mid_planes = out_planes/4=50，
                    分组卷积g=2→16*16*50→→BN→relu→转置重组→
                    分组卷积g=50→16*16*50→BN→relu→分组卷积g=2→16*16*200→BN→out→relu
                    →in_planes=200
            i=2，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=200,out_planes=200-0=200，stride=1,groups=2）
                所以i=2、3的操作都和i=1的一样
        
        in_planes = 200
        layer2：
        _make_layer(out_planes[1], num_blocks[1], groups)
            out_planes=400, num_blocks=8, groups=2
            i=0，stride = 2，cat_planes = in_planes=200：
                Bottleneck（in_planes=200,out_planes=400-200=200，stride=2,groups=2）
                    mid_planes = out_planes/4=50，
                    分组卷积g=2→16*16*50→→BN→relu→转置重组→
                    分组卷积g=50→8*8*50→BN→relu→分组卷积g=2→8*8*200→BN→out
                    shortcut：平均池化→8*8*200→
                    cat（out，shortcut）→8*8*（200+200=400）→in_planes=400
            i=1，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=400,out_planes=400-0=400，stride=1,groups=2）
                    mid_planes = out_planes/4=100，
                    分组卷积g=2→8*8*100→→BN→relu→转置重组→
                    分组卷积g=100→8*8*100→BN→relu→分组卷积g=2→8*8*400→BN→out→relu
                    →in_planes=400
            i=2，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=400,out_planes=400-0=400，stride=1,groups=2）
                所以i=2、3、4、5、6、7的操作都和i=1的一样
        
        in_planes = 400
        layer3：
        _make_layer(out_planes[2], num_blocks[2], groups)
            out_planes=800, num_blocks=4, groups=2
            i=0，stride = 2，cat_planes = in_planes=200：
                Bottleneck（in_planes=400,out_planes=800-400=400，stride=2,groups=2）
                    mid_planes = out_planes/4=100，
                    分组卷积g=2→4*4*100→→BN→relu→转置重组→
                    分组卷积g=100→4*4*100→BN→relu→分组卷积g=2→4*4*400→BN→out
                    shortcut：平均池化→4*4*400→
                    cat（out，shortcut）→4*4*（400+400=800）→in_planes=800
            i=1，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=800,out_planes=800-0=800，stride=1,groups=2）
                    mid_planes = out_planes/4=200，
                    分组卷积g=2→4*4*200→→BN→relu→转置重组→
                    分组卷积g=200→4*4*200→BN→relu→分组卷积g=2→4*4*800→BN→out→relu
                    →in_planes=800
            i=2，stride = 1，cat_planes = 0：
                Bottleneck（in_planes=800,out_planes=800-0=800，stride=1,groups=2）
                所以i=2、3的操作都和i=1的一样
        
        平均池化，k=4→1*1*800→
        flatten→800→全连接→（800,10）→softmax

12、SEResNet18

class SELayer(nn.Module):
    def __init__(self,channel,reduction=16):   # 论文中是压缩16倍
        super(SELayer,self).__init__()
        self.avg_pool=nn.AdaptiveAvgPool2d(1)   # 自动调整核的大小看，最后都会变成1*1的
        self.fc=nn.Sequential(
            nn.Linear(channel,channel//reduction,bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel//reduction,channel,bias=False),
            nn.Sigmoid()
        )
    def forward(self,x):
        b,c,_,_=x.size()   # b代表的就是batchsize，c代表的就是通道数，后面H和W是暂时不关心的
        y=self.avg_pool(x).view(b,c)#b*c*1*1->b*c
        y=self.fc(y).view(b,c,1,1)#b*c->b*c*1*1
        y=y.expand_as(x)#b*c*1*1->b*c*w*h     # 得到这个权重
        return x*y

    
class SEResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride,reduction=16):
        super(SEResNetBlock,self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.se=SELayer(out_channels,reduction)    ## 这里相对于resnet来说增加了SE层
        
        self.downsample = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)    # 降采样之前增加这个SE，相当于resnet来说也是增加了这个
        
        out += self.downsample(x)#ResNet的add操作，其实是张量的加和
        out = F.relu(out)
        return out

class SEResNetLayer(nn.Module):
    def __init__(self,block,n_blocks,in_channels,out_channels,stride):
        super(SEResNetLayer,self).__init__()
        self.modules=[]
        self.modules.append(block(in_channels,out_channels,stride))
        for _ in range(n_blocks-1):
            self.modules.append(block(out_channels,out_channels,1))
        self.blocks=nn.Sequential(*self.modules)
        
    def forward(self,x):
        return self.blocks(x)
    
class SEResNet18(nn.Module):
    def __init__(self,layer,block):
        super(SEResNet18,self).__init__()
        n_blocks=[2,2,2,2]
        self.conv1=nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn1=nn.BatchNorm2d(64)
        self.rb1=layer(block,n_blocks[0],64,64,1)
        self.rb2=layer(block,n_blocks[1],64,128,2)
        self.rb3=layer(block,n_blocks[2],128,256,2)
        self.rb4=layer(block,n_blocks[3],256,512,2)
        self.fc=nn.Linear(512,10)
        
    def forward(self,x):
        out=F.relu(self.bn1(self.conv1(x)))
        out=self.rb1(out)
        out=self.rb2(out)
        out=self.rb3(out)
        out=self.rb4(out)
        out=F.avg_pool2d(out,4)
        out=out.view(out.shape[0],-1)
        out=self.fc(out)
        return out

小笔记：
SEResNetLayer网络结构：

    SELayer类：
        channel, reduction=16（论文中是压缩16倍）
        
        b,c,_,_=x.size()    # b代表的就是batchsize，c代表的就是通道数，后面H和W是暂时不关心的
        AdaptiveAvgPool2d(1) ，动调整核的大小看，在最后都会变成1*1的→.view(b,c)  #b*c*1*1->b*c
        
        全连接，bias=False→（channel，channel//reduction）→relu→
        全连接，bias=False→（channel//reduction，channel）→sigmoid→
        .view(b,c,1,1)   # 在b*c->b*c*1*1
        →.expand_as(x)  # b*c*1*1->b*c*w*h     # 得到这个权重
        →y
        
        →x*y
    
    SEResNetBlock模块：
        参数in_channels, out_channels, stride, reduction=16
        卷积，k=3, s=stride, p=1, bias=False：
            if stride=1→（*，*，out_channels）
            else if stride=2→（*/2，*/2，out_channels）
            →（**，**，out_channels）
            →BN→relu
        卷积，k=3, s=1, p=1, bias=False→（**，**，out_channels）→BN→out
        
        se：SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
        
        if stride != 1 or in_channels != out_channels:
            downsample：卷积， kernel_size=1, stride=stride, bias=False：
                         if stride=2→（*/2，*/2，out_channels）
                         →BN
        
        add(out,downsample)→relu→
        
    SEResNetLayer类：
        参数：block, n_blocks, in_channels, out_channels, stride
        其中，block就是SEResNetBlock，n_blocks时一个数字
        
        SEResNetBlock(in_channels, out_channels, stride)
        
        for _ in range(n_blocks-1)：
            SEResNetBlock(out_channels,out_channels,1)
    
    
    SEResNet18网络结构：
        参数layer,block，其中layer就是SEResNetLayer，block就是SEResNetBlock
        n_blocks=[2,2,2,2]
        
        卷积，k=3,s=1,p=1,bias=False→32*32*64→BN→relu→
        
        rb1：
        SEResNetLayer(SEResNetBlock,n_blocks[0]=2,64,64,1)：
            in_channels=64，out_channels=64，stride=1
            SEResNetBlock(in_channels, out_channels, stride)：
                卷积→32*32*64→BN→relu→卷积→32*32*64→BN→out→
                se：SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
                →relu
            for _ in range(n_blocks-1=1)：
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→32*32*64→BN→out→se加权中处理→out→relu
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→32*32*64→BN→out→se加权中处理→out→relu
        
        rb2：
        SEResNetLayer(SEResNetBlock,n_blocks[1]=2,64,128,2)
            in_channels=64，out_channels=128，stride=2
            SEResNetBlock(in_channels, out_channels, stride)：
                卷积→16*16*128→BN→relu→卷积→16*16*128→BN→out→
                se：SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
                downsample：卷积→16*16*128→BN→
                add(out,downsample)→relu→
            for _ in range(n_blocks-1=1)：
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→16*16*128→BN→out→se加权中处理→out→relu
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→16*16*128→BN→out→se加权中处理→out→relu
        
        rb3：
        SEResNetLayer(SEResNetBlock,n_blocks[2]=2,128,256,2)
            in_channels=128，out_channels=256，stride=2
            SEResNetBlock(in_channels, out_channels, stride)：
                卷积→8*8*256→BN→relu→卷积→8*8*256→BN→out→
                se：SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
                downsample：卷积→8*8*256→BN→
                add(out,downsample)→relu→
            for _ in range(n_blocks-1=1)：
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→8*8*256→BN→out→se加权中处理→out→relu
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→8*8*256→BN→out→se加权中处理→out→relu
        
        rb4：
        SEResNetLayer(SEResNetBlock,n_blocks[3]=2,256,512,2)
            in_channels=128，out_channels=256，stride=2
            SEResNetBlock(in_channels, out_channels, stride)：
                卷积→4*4*512→BN→relu→卷积→4*4*512→BN→out→
                se：SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
                downsample：卷积→4*4*512→BN→
                add(out,downsample)→relu→
            for _ in range(n_blocks-1=1)：
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→4*4*512→BN→out→se加权中处理→out→relu
                SEResNetBlock(out_channels,out_channels,1)：
                    卷积→BN→relu→卷积→4*4*512→BN→out→se加权中处理→out→relu
        
        平均池化，k=4→1*1*512→
        flatten→512→全连接→（512,10）→softmax

__Lu__

关注

1
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
Pytorch+几种视觉神经网络整理

pytorch+几种视觉神经网络整理LeNetAlexNetVGGNet11VGGNet16GoogLeNetResNet18DenseNetResNeXtSqueezeNetMobileNetShuffleNetSEResNet18LeNetclass LeNet(nn.Module): def __init__(self): super(LeNet,self).__init__() #第一层conv1卷积层，in_channel=1,output_channe
复制链接

扫一扫

专栏目录