1、LeNet
class LeNet(nn.Module):
def __init__(self):
super(LeNet,self).__init__()
self.conv1=nn.Conv2d(3,6,5)
self.conv2=nn.Conv2d(6,16,5)
self.fc1=nn.Linear(16*5*5,120)
self.fc2=nn.Linear(120,80)
self.fc3=nn.Linear(80,10)
def forward(self,x):
x=F.max_pool2d(F.relu(self.conv1(x)),2)
x=F.max_pool2d(F.relu(self.conv2(x)),2)
x=x.view(x.shape[0],-1)
x=F.relu(self.fc1(x))
x=F.relu(self.fc2(x))
x=self.fc3(x)
return x
小笔记:
lenet结构
32*32*3→卷积,5*5,→28*28*16→最大池化→14*14*16
→卷积,5*5*6→10*10*16→最大池化→5*5*16→
flatten,80
→全连接→(80,120)
→全连接→(120,80)
→全连接→(80,10)
→softmax
2、AlexNet
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet,self).__init__()
self.feature_block=nn.Sequential(
nn.Conv2d(3,64,kernel_size=11,stride=4,padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(64,192,kernel_size=5,padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2),
nn.Conv2d(192,384,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384,256,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256,256,kernel_size=3,padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2)
)
self.avgpool=nn.AdaptiveAvgPool2d((6,6))
self.class_block=nn.Sequential(
nn.Dropout(),
nn.Linear(256*6*6,4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(inplace=True),
nn.Linear(4096,10),
)
def forward(self,x):
x=self.feature_block(x)
x=self.avgpool(x)
x=x.view(x.size(0),256*6*6)
x=self.class_block(x)
return x
Alexnet结构
227*227*3→卷积,k=11、s=4、p=2,→56*56*64→relu激活→最大池化,k=3、s=2→27*27*64
→卷积,k=5、p=2,→27*27*192→relu激活→最大池化,k=3、s=2→13*13*192
→卷积,k=3、p=1,→13*13*384→relu激活
→卷积,k=3、p=1,→13*13*256→relu激活
→卷积,k=3、p=1,→13*13*256→relu激活→最大池化,k=3、s=2→6*6*256
→自适应调整成6*6*256
→flatten,6*6*256
→dropout,全连接→(6*6*256,4096)→relu激活
→dropout,全连接→(4096,4096)→relu激活
→dropout,全连接→(4096,10)
→softmax
3、VGGNet11
class VGGBlock(nn.Module):
def __init__(self,in_channels,out_channels,batch_norm):
super(VGGBlock,self).__init__()
stack=[]
stack.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
if batch_norm:
stack.append(nn.BatchNorm2d(out_channels))
stack.append(nn.ReLU(inplace=True))
self.model_block=nn.Sequential(*stack)
def forward(self,x):
return self.model_block(x)
class VGGNet11(nn.Module):
def __init__(self,block,pool,batch_norm):
super(VGGNet11,self).__init__()
self.feature_block=nn.Sequential(
block(3,64,batch_norm),
pool(kernel_size=2,stride=2),
block(64,128,batch_norm),
pool(kernel_size=2,stride=2),
block(128,256,batch_norm),
block(256,256,batch_norm),
pool(kernel_size=2,stride=2),
block(256,512,batch_norm),
block(512,512,batch_norm),
pool(kernel_size=2,stride=2),
block(512,512,batch_norm),
block(512,512,batch_norm),
pool(kernel_size=2,stride=2),
)
self.classifier=nn.Linear(512,10)
def forward(self,x):
x=self.feature_block(x)
x=x.view(x.shape[0],-1)
x=self.classifier(x)
return x
小笔记:
VGGBet11结构
VGGBlock结构,H、W的size不变
if参数batch_norm=True:
→卷积,k=3、p=1→BN→relu激活
else if参数batch_norm=False:
→卷积,k=3、p=1→relu激活
32*32*3→
VGGBlock(3,64,batch_norm=True)→32*32*64→池化,k=2、s=2→16*16*64→
VGGBlock(64,128,batch_norm=True)→16*16*128→池化,k=2、s=2→8*8*128→
VGGBlock(128,256,batch_norm=True)→8*8*256
→VGGBlock(256,256,batch_norm=True)→8*8*256
→池化,k=2、s=2→4*4*256→
VGGBlock(256,512,batch_norm=True)→4*4*512
→VGGBlock(512,512,batch_norm=True)→4*4*512
→池化,k=2、s=2→2*2*512→
VGGBlock(512,512,batch_norm=True)→2*2*512
→VGGBlock(512,512,batch_norm=True)→2*2*512
→池化,k=2、s=2→1*1*512→
flatten,512,→
全连接→(512,10)→
softmax
4、VGGNet16
class VGGNet16(nn.Module):
def __init__(self,block,pool,batch_norm):
super(VGGNet16,self).__init__()
self.feature_block=nn.Sequential(
block(3,64,batch_norm),
block(64,64,batch_norm),
pool(kernel_size=2,stride=2),
block(64,128,batch_norm),
block(128,128,batch_norm),
pool(kernel_size=2,stride=2),
block(128,256,batch_norm),
block(256,256,batch_norm),
pool(kernel_size=2,stride=2),
block(256,512,batch_norm),
block(512,512,batch_norm),
block(512,512,batch_norm),
pool(kernel_size=2,stride=2),
block(512,512,batch_norm),
block(512,512,batch_norm),
block(512,512,batch_norm),
pool(kernel_size=2,stride=2),
)
self.classifier=nn.Linear(512,10)
def forward(self,x):
x=self.feature_block(x)
x=x.view(x.shape[0],-1)
x=self.classifier(x)
return x
小笔记:
VGGBet16结构
VGGBlock结构,H、W的size不变
if参数batch_norm=True:
→卷积,k=3、p=1→BN→relu激活
else if参数batch_norm=False:
→卷积,k=3、p=1→relu激活
32*32*3→
VGGBlock(3,64,batch_norm=True)→32*32*64
→VGGBlock(64,64,batch_norm=True)→32*32*64
→池化,k=2、s=2→16*16*64→
VGGBlock(64,128,batch_norm=True)→16*16*128
→VGGBlock(128,128,batch_norm=True)→16*16*128
→池化,k=2、s=2→8*8*128→
VGGBlock(128,256,batch_norm=True)→8*8*256
→VGGBlock(256,256,batch_norm=True)→8*8*256
→VGGBlock(256,256,batch_norm=True)→8*8*256
→池化,k=2、s=2→4*4*256→
VGGBlock(256,512,batch_norm=True)→4*4*512
→VGGBlock(512,512,batch_norm=True)→4*4*512
→VGGBlock(512,512,batch_norm=True)→4*4*512
→池化,k=2、s=2→2*2*512→
VGGBlock(512,512,batch_norm=True)→2*2*512
→VGGBlock(512,512,batch_norm=True)→2*2*512
→VGGBlock(512,512,batch_norm=True)→2*2*512
→池化,k=2、s=2→1*1*512→
flatten,512,→
全连接→(512,10)→
softmax
5、GoogLeNet
class Inception(nn.Module):
def __init__(self,in_planes,n1x1,n3x3red,n3x3,n5x5red,n5x5,pool_planes):
super(Inception,self).__init__()
self.b1=nn.Sequential(
nn.Conv2d(in_planes,n1x1,kernel_size=1),
nn.BatchNorm2d(n1x1),
nn.ReLU(True),
)
self.b2=nn.Sequential(
nn.Conv2d(in_planes,n3x3red,kernel_size=1),
nn.BatchNorm2d(n3x3red),
nn.ReLU(True),
nn.Conv2d(n3x3red,n3x3,kernel_size=3,padding=1),
nn.BatchNorm2d(n3x3),
nn.ReLU(True),
)
self.b3=nn.Sequential(
nn.Conv2d(in_planes,n5x5red,kernel_size=1),
nn.BatchNorm2d(n5x5red),
nn.ReLU(True),
nn.Conv2d(n5x5red,n5x5,kernel_size=5,padding=2),
nn.BatchNorm2d(n5x5),
nn.ReLU(True),
)
self.b4=nn.Sequential(
nn.MaxPool2d(3,stride=1,padding=1),
nn.Conv2d(in_planes,pool_planes,kernel_size=1),
nn.BatchNorm2d(pool_planes),
nn.ReLU(True),
)
def forward(self,x):
x1=self.b1(x)
x2=self.b2(x)
x3=self.b3(x)
x4=self.b4(x)
return torch.cat([x1,x2,x3,x4],1)
class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet,self).__init__()
self.feature_block=nn.Sequential(
nn.Conv2d(3,192,kernel_size=3,padding=1),
nn.BatchNorm2d(192),
nn.ReLU(True),
)
self.a3=Inception(192,64,96,128,16,32,32)
self.b3=Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
self.avgpool=nn.AvgPool2d(8,stride=1)
self.linear=nn.Linear(1024,10)
def forward(self,x):
out=self.feature_block(x)
out=self.a3(out)
out=self.b3(out)
out=self.maxpool(out)
out=self.a4(out)
out=self.b4(out)
out=self.c4(out)
out=self.d4(out)
out=self.e4(out)
out = self.maxpool(out)
out = self.a5(out)
out = self.b5(out)
out = self.avgpool(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet,self).__init__()
self.feature_block=nn.Sequential(
nn.Conv2d(3,192,kernel_size=3,padding=1),
nn.BatchNorm2d(192),
nn.ReLU(True),
)
self.a3=Inception(192,64,96,128,16,32,32)
self.b3=Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool=nn.MaxPool2d(3,stride=2,padding=1)
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
self.avgpool=nn.AvgPool2d(8,stride=1)
self.linear=nn.Linear(1024,10)
def forward(self,x):
out=self.feature_block(x)
out=self.a3(out)
out=self.b3(out)
out=self.maxpool(out)
out=self.a4(out)
out=self.b4(out)
out=self.c4(out)
out=self.d4(out)
out=self.e4(out)
out = self.maxpool(out)
out = self.a5(out)
out = self.b5(out)
out = self.avgpool(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
小笔记:
Googlenet结构
Inception模块:
参数in_planes, n1x1, n3x3red,n3x3, n5x5red,n5x5, n5x5
→ b1:卷积,k=1→(,,n1x1)→BN→relu激活
→ b2:卷积,k=1→(,,n3x3red)→BN→relu激活→卷积,k=3、p=1→(,,n3x3)→BN→relu激活
→ b3:卷积,k=1→(,,n5x5red)→BN→relu激活→卷积,k=5、p=2→(,,n5x5)→BN→relu激活
→ b4:池化,k=3、s=1、p=1→(,,in_planes)→卷积,k=1→(,,pool_planes)→BN→relu激活
→ cat(b1,b2,b3,b4)→(,,n1x1+n3x3+n5x5+n5x5)
备注:该操作H、W的size没有改变,最后输出通道数为n1x1+n3x3+n5x5+n5x5
32*32*3→
卷积,k=3、p=1→32*32*192→BN→relu激活→
a3:Inception(192, 64, 96,128, 16,32, 32)→32*32*(64+128+32+32=256)→
b3:Inception(256, 128, 128,192, 32,96, 64)→32*32*(128+192+96+64=480)→
池化,k=3、s=2、p=1→16*16*480
a4:Inception(480, 192, 96,208, 16,48, 64) →16*16*(192+208+48+64=512)→
b4:Inception(512, 160, 112,224, 24,64, 64)→16*16*(160+224+64+64=512)→
c4:Inception(512, 128, 128,256, 24,64, 64)→16*16*(128+256+64+64=512)→
d4:Inception(512, 112, 144,288, 32,64, 64)→16*16*(112+288+64+64=528)→
e4:Inception(528, 256, 160,320, 32,128, 128) →16*16*(256+320+128+128=832)→
最大池化,k=2,→8*8*416
a5:Inception(832, 256, 160,320, 32,128, 128)→8*8*(256+320+128+128=832)→
b5:Inception(832, 384, 192,384, 48,128, 128)→8*8*(384+384+128+128=1024)→
平均池化,k=8、s=1→1*1*1024
flatten,→1024
→ 全连接 → (1024,10)
→softmax
6、ResNet18
class ResNetBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(ResNetBlock,self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.downsample(x)
out = F.relu(out)
return out
class ResNetLayer(nn.Module):
def __init__(self, block, n_blocks, in_channels, out_channels, stride):
super(ResNetLayer,self).__init__()
self.modules = []
self.modules.append(block(in_channels,out_channels,stride))
for _ in range(n_blocks-1):
self.modules.append(block(out_channels,out_channels,1))
self.blocks = nn.Sequential(*self.modules)
def forward(self,x):
return self.blocks(x)
class ResNet18(nn.Module):
def __init__(self,layer,block):
super(ResNet18,self).__init__()
n_blocks = [2,2,2,2]
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.rb1 = layer(block, n_blocks[0], 64, 64, 1)
self.rb2 = layer(block, n_blocks[1], 64, 128, 2)
self.rb3 = layer(block, n_blocks[2], 128, 256, 2)
self.rb4 = layer(block, n_blocks[3], 256, 512, 2)
self.fc = nn.Linear(512,10)
def forward(self,x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.rb1(out)
out = self.rb2(out)
out = self.rb3(out)
out = self.rb4(out)
out = F.avg_pool2d(out,4)
out = out.view(out.shape[0],-1)
out = self.fc(out)
return out
小笔记:
resnet18网络结构
ResNetBlock模块:
参数:in_channels, out_channels, stride
if参数stride=1:
卷积,k=3、s=1、p=1、bias=False→(*,*,out_channels)→BN→relu激活
卷积,k=3、s=1、p=1、bias=False→(*,*,out_channels)→BN→out
if参数in_channels!=out_channels:
downsample:卷积,k=1、s=stride=1、bias=False→(*,*,in_channels)_(*,*,out_channels)→BN→
else if参数stride!=1:
卷积,k=3、s=stride、p=1、bias=False→(1/2*,1/2*,out_channels)→BN→relu激活
卷积,k=3、s=1、p=1、bias=False→(1/2*,1/2*,out_channels)→BN→out
downsample:卷积,k=1、s=stride、bias=False→(1/2*,1/2*,in_channels)_(*,*,out_channels)→BN→
add(out,downsample)→relu激活
ResNetLayer层:
参数:block, n_blocks, in_channels, out_channels, stride
block就是上面的ResNetBlock,下面改用ResNetBlock,n_blocks是个数字
→
ResNetBlock(in_channels, out_channels, stride)→
for _ in range(n_blocks-1):
→ResNetBlock(out_channels, out_channels, 1)
resnet18:
参数:layer,block
layer就是上面的ResNetLayer,block就是上面的ResNetBlock
n_blocks=[2,2,2,2]
卷积,k=3、s=1、p=1、bias=False→32*32*64→BN→ relu激活→
ResNetLayer(ResNetBlock, n_blocks[0]=2, 64, 64, 1):
(因为in_channels=out_channels,所以没有downsample层)
ResNetBlock(64, 64, 1):
卷积,→32*32*64→BN→relu激活→
卷积,→32*32*64→BN→out→relu激活→
ResNetBlock(64, 64, 1):
卷积,→32*32*64→BN→relu激活→
卷积,→32*32*64→BN→out→relu激活→
ResNetLayer(ResNetBlock,n_blocks[1]=2, 64, 128, 2):
ResNetBlock(64, 128, 2):
卷积,→16*16*128→BN→relu激活→
卷积,→16*16*128→BN→out
downsample:由32*32*64→16*16*128→
add(out,downsample)→relu激活→
ResNetBlock(128, 128, 1):
卷积,→16*16*128→BN→relu激活→
卷积,→16*16*128→BN→out→relu激活→
ResNetLayer(ResNetBlock,n_blocks[2]=2, 128, 256, 2):
ResNetBlock(128, 256, 2):
卷积,→8*8*256→BN→relu激活→
卷积,→8*8*256→BN→out
downsample:由16*16*128→8*8*256→
add(out,downsample)→relu激活→
ResNetBlock(256, 256, 1):
卷积,→8*8*256→BN→relu激活→
卷积,→8*8*256→BN→out→relu激活→
ResNetLayer(ResNetBlock,n_blocks[2]=2, 256, 512, 2):
ResNetBlock(256, 512, 2):
卷积,→4*4*512→BN→relu激活→
卷积,→4*4*512→BN→out
downsample:由8*8*256→4*4*512→
add(out,downsample)→relu激活→
ResNetBlock(512, 512, 1):
卷积,→4*4*512→BN→relu激活→
卷积,→4*4*512→BN→out→relu激活→
平均池化,k=4→1*1*512→
flatten,→512→
全连接→(512,10)→
softmax
7、DenseNet
import math
class Bottleneck(nn.Module):
def __init__(self,in_planes,growth_rate):
super(Bottleneck,self).__init__()
self.bn1=nn.BatchNorm2d(in_planes)
self.conv1=nn.Conv2d(in_planes,4*growth_rate,kernel_size=1,bias=False)
self.bn2=nn.BatchNorm2d(4*growth_rate)
self.conv2=nn.Conv2d(4*growth_rate,growth_rate,kernel_size=3,padding=1,bias=False)
def forward(self,x):
out=self.conv1(F.relu(self.bn1(x)))
out=self.conv2(F.relu(self.bn2(out)))
out=torch.cat([out,x],1)
return out
class Transition(nn.Module):
def __init__(self,in_planes,out_planes):
super(Transition,self).__init__()
self.bn=nn.BatchNorm2d(in_planes)
self.conv=nn.Conv2d(in_planes,out_planes,kernel_size=1,bias=False)
def forward(self,x):
out=self.conv(F.relu(self.bn(x)))
out=F.avg_pool2d(out,2)
return out
class DenseNet(nn.Module):
def __init__(self,block,nblocks,growth_rate=12,reduction=0.5,num_classes=10):
super(DenseNet,self).__init__()
self.growth_rate=growth_rate
num_planes=2*growth_rate
self.conv1=nn.Conv2d(3,num_planes,kernel_size=3,padding=1,bias=False)
self.dense1=self._make_dense_layers(block,num_planes,nblocks[0])
num_planes+=nblocks[0]*growth_rate
out_planes=int(math.floor(num_planes*reduction))
self.trans1=Transition(num_planes,out_planes)
num_planes=out_planes
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
num_planes += nblocks[1]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans2 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
num_planes += nblocks[2]*growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans3 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
num_planes += nblocks[3]*growth_rate
self.bn=nn.BatchNorm2d(num_planes)
self.linear=nn.Linear(num_planes,num_classes)
def _make_dense_layers(self,block,in_planes,nblock):
layers=[]
for i in range(nblock):
layers.append(block(in_planes,self.growth_rate))
in_planes+=self.growth_rate
return nn.Sequential(*layers)
def forward(self,x):
out=self.conv1(x)
out=self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.trans3(self.dense3(out))
out = self.dense4(out)
out=F.avg_pool2d(F.relu(self.bn(out)),4)
out=out.view(out.size(0),-1)
out=self.linear(out)
return out
def DenseNet121():
return DenseNet(Bottleneck,[6,12,24,16],growth_rate=32)
小笔记:
densenet网络结构
Bottleneck模块:
参数:in_planes, growth_rate
x→
BN→relu激活→卷积,k=1、bias=False→(,,4*growth_rate)→
BN→relu激活→卷积,k=3、p=1、bias=False→(,,growth_rate)→out
cat(out,x)→
备注:H、W的size不变,输出通道数为growth_rate
Transition模块:
参数:in_planes,out_planes
BN→relu激活→卷积,k=1、bias=False→(,,out_planes)→
平均池化,k=2→下采样,size减半→
DenseNet121网络结构:
参数:block, nblocks, growth_rate=12, reduction=0.5, num_classes=10
block就是上面的Bottleneck,reduction就是压缩比率
growth_rate = growth_rate, num_planes = 2*growth_rate
_make_dense_layers函数:
参数block, in_planes, nblock
block就是上面的Bottleneck,nblock代表构建denseblock中有多少bottleneck层,是一个数字
for i in range(nblock):
Bottleneck(in_planes, growth_rate):
x→
BN→relu激活→卷积,k=1、bias=False→(,,4*growth_rate)→
BN→relu激活→卷积,k=3、p=1、bias=False→(,,growth_rate)→out
cat(out,x)→
in_planes += growth_rate
nblocks = [6,12,24,16]
growth_rate=32, num_planes=2*growth_rate=64
卷积,k=3、p=1、bias=False→32*32*(2*growth_rate=64)
dense1:
_make_dense_layers(Bottleneck, in_planes=num_planes=64, nblocks[0]=6):
Bottleneck(in_planes, growth_rate):
x→BN→relu→卷积→32*32*(4*32)→BN→relu→卷积→32*32*32→out
cat(out,x)→32*32*92→
in_planes=in_planes+growth_rate=92
i=1,Bottleneck→卷积→卷积→32*32*(64+2*32)→
......
i=5,Bottleneck→卷积→卷积→32*32*(64+6*32),即32*32*256→
num_planes += nblocks[0]*growth_rate→64+6*32=256
out_planes = int(num_planes*reduction)=256*0.5=128
trans1
Transition(num_planes,out_planes):
卷积→平均池化→16*16*128
num_planes = out_planes=128
dense2:
_make_dense_layers(Bottleneck, in_planes=num_planes=128, nblocks[1]=12):
Bottleneck(in_planes, growth_rate):
x→BN→relu→卷积→16*16*(4*32)→BN→relu→卷积→16*16*32→out
cat(out,x)→16*16*160→
in_planes=in_planes+growth_rate=128+32=160
i=1,Bottleneck→卷积→卷积→16*16*(128+2*32)→
......
i=11,Bottleneck→卷积→卷积→16*16*(128+12*32),16*16*512→
num_planes += nblocks[0]*growth_rate→128+12*32=512
out_planes = int(num_planes*reduction)=512*0.5=256
trans2
Transition(num_planes,out_planes):
卷积→平均池化→8*8*256
num_planes = out_planes=256
dense3:
_make_dense_layers(Bottleneck, in_planes=num_planes=256, nblocks[2]=24):
Bottleneck(in_planes, growth_rate):
x→BN→relu→卷积→8*8*(4*32)→BN→relu→卷积→8*8*32→out
cat(out,x)→8*8*288→
in_planes=in_planes+growth_rate=256+32=288
i=1,Bottleneck→卷积→卷积→8*8*(256+2*32)→
......
i=23,Bottleneck→卷积→卷积→8*8*(256+24*32),8*8*1024→
num_planes += nblocks[0]*growth_rate→256+24*32=1024
out_planes = int(num_planes*reduction)=1024*0.5=512
trans3
Transition(num_planes,out_planes):
卷积→平均池化→4*4*512
num_planes = out_planes=512
dense4:
_make_dense_layers(Bottleneck, in_planes=num_planes=512, nblocks[3]=16):
Bottleneck(in_planes, growth_rate):
x→BN→relu→卷积→4*4*(4*32)→BN→relu→卷积→4*4*32→out
cat(out,x)→4*4*544→
in_planes=in_planes+growth_rate=512+32=544
i=1,Bottleneck→卷积→卷积→4*4*(512+2*32)→
......
i=23,Bottleneck→卷积→卷积→4*4*(512+16*32),4*4*1024→
num_planes += nblocks[0]*growth_rate→512+16*32=1024
BN→relu激活→全局平均,k=4→1*1*1024→
flatten→1024→
全连接→(1024,10)→
softmax
8、ResNeXt
class Block(nn.Module):
expansion = 2
def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(group_width)
self.conv2 = nn.Conv2d(group_width, group_width,
kernel_size=3, stride=stride,
padding=1, groups=cardinality,
bias=False)
self.bn2 = nn.BatchNorm2d(group_width)
self.conv3 = nn.Conv2d(group_width, self.expansion*group_width,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*group_width,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*group_width)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNeXt(nn.Module):
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
super(ResNeXt, self).__init__()
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(num_blocks[0], 1)
self.layer2 = self._make_layer(num_blocks[1], 2)
self.layer3 = self._make_layer(num_blocks[2], 2)
self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
def _make_layer(self, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
self.bottleneck_width *= 2
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNeXt29_2x64d():
return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
小笔记
ResNeXt网络结构
Block类:
扩张系数expansion = 2
参数in_planes, 基数cardinality=32, bottleneck_width=4, stride=1
group_width = cardinality * bottleneck_width=32*4=128
→卷积,k=1,、bias=False→(*,*,128)→BN→relu激活→
→卷积,k=3、stride=stride、p=1、groups=cardinality分组参数、bias=False→
if参数stride=1,H、W的size不变
else if参数stride=2,下采样
→通道总数不变,128
groups参数是分组参数,每组通道数128/cardinality?
→BN→relu激活→
卷积,k=1、bias=False→(**,**,expansion*group_width=256)→BN→out→
if stride != 1 or in_planes != expansion*group_width=256:
shortcut:卷积,k=1、stride=stride、bias=False→
if参数stride=1,H、W的size不变
else if参数stride=2,下采样
→通道数(expansion*group_width=256),(**,**,256)
→BN
add(out,shortcut)→(**,**,256)→relu激活
ResNeXt29_2x64d网络结构:
参数num_blocks, cardinality, bottleneck_width, num_classes=10
num_blocks举例子[3,3,3],cardinality是分组个数,bottleneck_width是每组通道数
num_classes是分类个数
cardinality = 2,bottleneck_width=64
in_planes = 64
num_blocks=[3,3,3]
_make_layer函数:
参数num_blocks, stride
其中,num_blocks是个数字
strides = [stride] + [1]*(num_blocks-1)
即是stride=1,[1]+[1,1]→[1,1,1];stride=2,[2]+[1,1]→[2,1,1];
for stride in strides:
Block(in_planes, cardinality, bottleneck_width, stride)
in_planes = Block.expansion * cardinality * bottleneck_width=256
bottleneck_width *= 2→128
卷积,k=1、bias=False→32*32*64→BN→relu激活→
layer1:
_make_layer(num_blocks[0], 1):
strides=[1,1,1]
Block(in_planes=64, cardinality=2, bottleneck_width=64, stride=1):
group_width=cardinality * bottleneck_width=128
卷积→32*32*128→BN→relu→
分组卷积→32*32*128→BN→relu→
卷积,expansion*group_width=256→32*32*256→BN→out→
shortcut:in_planes != expansion*group_width=256:
卷积→32*32*256→BN→
add(out,shortcut)→32*32*256→relu激活→
in_planes=expansion * cardinality * bottleneck_width=256
Block(in_planes=256, cardinality=2, bottleneck_width=64, stride=1):
group_width=cardinality * bottleneck_width=128
卷积→32*32*128→BN→relu→
分组卷积→32*32*128→BN→relu→
卷积,expansion*group_width=256→32*32*256→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=256
Block(in_planes=256, cardinality=2, bottleneck_width=64, stride=1):
group_width=cardinality * bottleneck_width=128
卷积→32*32*128→BN→relu→
分组卷积→32*32*128→BN→relu→
卷积,expansion*group_width=256→32*32*256→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=256
bottleneck_width *= 2→bottleneck_width=128
layer2:
_make_layer(num_blocks[1], 2):
strides=[2,1,1]
Block(in_planes=256, cardinality=2, bottleneck_width=128, stride=2):
group_width=cardinality * bottleneck_width=256
卷积→32*32*256→BN→relu→
分组卷积→16*16*256→BN→relu→
卷积,expansion*group_width=512→16*16*512→BN→out→
shortcut:stride!=1:
卷积→16*16*512→BN→
add(out,shortcut)→16*16*512→relu激活→
in_planes=expansion * cardinality * bottleneck_width=512
Block(in_planes=512, cardinality=2, bottleneck_width=128, stride=1):
group_width=cardinality * bottleneck_width=256
卷积→16*16*256→BN→relu→
分组卷积→16*16*256→BN→relu→
卷积,expansion*group_width=512→16*16*512→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=512
Block(in_planes=512, cardinality=2, bottleneck_width=128, stride=1):
group_width=cardinality * bottleneck_width=256
卷积→16*16*256→BN→relu→
分组卷积→16*16*256→BN→relu→
卷积,expansion*group_width=512→16*16*512→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=512
bottleneck_width *= 2→bottleneck_width=256
layer3:
_make_layer(num_blocks[2], 2)
strides=[2,1,1]
Block(in_planes=512, cardinality=2, bottleneck_width=256, stride=2):
group_width=cardinality * bottleneck_width=512
卷积→16*16*512→BN→relu→
分组卷积→8*8*512→BN→relu→
卷积,expansion*group_width=1024→8*8*1024→BN→out→
shortcut:stride!=1:
卷积→8*8*1024→BN→
add(out,shortcut)→8*8*1024→relu激活→
in_planes=expansion * cardinality * bottleneck_width=1024
Block(in_planes=1024, cardinality=2, bottleneck_width=256, stride=1):
group_width=cardinality * bottleneck_width=512
卷积→8*8*512→BN→relu→
分组卷积→8*8*512→BN→relu→
卷积,expansion*group_width=1024→8*8*1024→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=1024
Block(in_planes=1024, cardinality=2, bottleneck_width=256, stride=1):
group_width=cardinality * bottleneck_width=512
卷积→8*8*512→BN→relu→
分组卷积→8*8*512→BN→relu→
卷积,expansion*group_width=1024→8*8*1024→BN→out→
relu激活→
in_planes=expansion * cardinality * bottleneck_width=1024
bottleneck_width *= 2→bottleneck_width=512
平均池化,k=8,→1*1*1024
flatten,1024→
全连接→(1024,10)→
softmax
9、SqueezeNet
from torch import nn
import torch.nn.init as init
class Fire(nn.Module):
def __init__(self,inplanes,s1,e1,e3):
super(Fire,self).__init__()
self.inplanes=inplanes
self.squeeze=nn.Conv2d(inplanes,s1,kernel_size=1)
self.squeeze_activation=nn.ReLU(inplace=True)
self.expand1x1=nn.Conv2d(s1,e1,kernel_size=1)
self.expand1x1_activation=nn.ReLU(inplace=True)
self.expand3x3=nn.Conv2d(s1,e3,kernel_size=True)
self.expand3x3_activation=nn.ReLU(inplace=True)
def forward(self,x):
x=self.squeeze_activation(self.squeeze(x))
return torch.cat([
self.expand1x1_activation(self.expand1x1(x)),
self.expand3x3_activation(self.expand3x3(x))
],1)
class SqueezeNet(nn.Module):
def __init__(self, version=1.0, num_classes=10):
super(SqueezeNet, self).__init__()
if version not in [1.0, 1.1]:
raise ValueError("Unsupported SqueezeNet version {version}:"
"1.0 or 1.1 expected".format(version=version))
self.num_classes = num_classes
if version == 1.0:
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(96, 16, 64, 64),
Fire(128, 16, 64, 64),
Fire(128, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 32, 128, 128),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(512, 64, 256, 256),
)
else:
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(64, 16, 64, 64),
Fire(128, 16, 64, 64),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(128, 32, 128, 128),
Fire(256, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
Fire(512, 64, 256, 256),
)
final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
final_conv,
)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m is final_conv:
init.normal(m.weight.data, mean=0.0, std=0.01)
else:
init.kaiming_uniform(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x.view(x.size(0), self.num_classes)
小笔记:
squeezenet网络结构:
Fire类:
参数inplanes, s1, e1, e3
卷积,k=1→(*,*,s1)→relu激活→squeeze
卷积,k=1,(*,*,s1)→(*,*,e1)→relu激活→expand1x1
卷积,k=True(为什么不是3?),(*,*,s1)→(*,*,e3)→relu激活→expand3x3
cat(expand1x1,expand3x3)
SqueezeNet网络结构:
参数version=1.0, num_classes=10
if version not in [1.0, 1.1]:
raise ValueError("Unsupported SqueezeNet version {version}:"
"1.0 or 1.1 expected".format(version=version))
if version == 1.0:
features:
卷积,k=7、s=2,→13*13*96→relu激活→最大池化,k=3、s=2、ceil_mode=True,→6*6*96
Fire(96, 16, 64, 64)→卷积→卷积+卷积→6*6*(64+64=128)
Fire(128, 16, 64, 64)→卷积→卷积+卷积→6*6*(64+64=128)
Fire(128, 32, 128, 128)→卷积→卷积+卷积→6*6*(128+128=256)
→最大池化,k=3、s=2、ceil_mode=True,→3*3*256
Fire(256, 32, 128, 128)→卷积→卷积+卷积→6*6*(128+128=256)
Fire(256, 48, 192, 192)→卷积→卷积+卷积→6*6*(192+192=384)
Fire(384, 48, 192, 192)→卷积→卷积+卷积→6*6*(192+192=384)
Fire(384, 64, 256, 256)→卷积→卷积+卷积→6*6*(256+256=512)
→最大池化,k=3、s=2、ceil_mode=True,→1*1*512
Fire(512, 64, 256, 256)→卷积→卷积+卷积→1*1*(256+256=512)
else if version == 1.1:
features:
卷积,k=3、s=2,→15*15*96→relu激活→最大池化,k=3、s=2、ceil_mode=True,→7*7*96
Fire(64, 16, 64, 64)→卷积→卷积+卷积→7*7*(64+64=128)
Fire(128, 16, 64, 64)→卷积→卷积+卷积→7*7*(64+64=128)
→最大池化,k=3、s=2、ceil_mode=True,→3*3*128
Fire(128, 32, 128, 128)→卷积→卷积+卷积→3*3*(128+128=256)
Fire(256, 32, 128, 128)→卷积→卷积+卷积→3*3*(128+128=256)
→最大池化,k=3、s=2、ceil_mode=True,→1*1*256
Fire(256, 48, 192, 192)→卷积→卷积+卷积→1*1*(192+192=384)
Fire(384, 48, 192, 192)→卷积→卷积+卷积→1*1*(192+192=384)
Fire(384, 64, 256, 256)→卷积→卷积+卷积→1*1*(256+256=512)
Fire(512, 64, 256, 256)→卷积→卷积+卷积→1*1*(256+256=512)
→Dropout(p=0.5)→
final_conv:卷积,k=1→1*1*10→
import torch.nn.init as init
for m in (网络的)modules():
if isinstance(m, nn.Conv2d): # 判断是否是卷积层
if m is final_conv: # 是否是最后的卷积层,最后的用normal初始化
init.normal(m.weight.data, mean=0.0, std=0.01)
else: # 其他的用kaiming_uniform初始化
init.kaiming_uniform(m.weight.data) # 初始化的分布不同
if m.bias is not None:
m.bias.data.zero_()
flatten→(batch,num_classes)
10、MobileNet
class Block(nn.Module):
"DWConv+PointWiseConv"
def __init__(self,in_planes,out_planes,stride):
super(Block,self).__init__()
self.conv1=nn.Conv2d(in_planes,in_planes,kernel_size=3,stride=stride,padding=1,groups=in_planes,bias=False)
self.bn1=nn.BatchNorm2d(in_planes)
self.conv2=nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=1,padding=0,bias=False)
self.bn2=nn.BatchNorm2d(out_planes)
def forward(self,x):
out=F.relu(self.bn1(self.conv1(x)))
out=F.relu(self.bn2(self.conv2(out)))
return out
class MobileNet(nn.Module):
cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
def __init__(self, num_classes=10):
super(MobileNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_planes=32)
self.linear = nn.Linear(1024, num_classes)
def _make_layers(self, in_planes):
layers = []
for x in self.cfg:
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
layers.append(Block(in_planes, out_planes, stride))
in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.avg_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
小笔记:
MobileNet网络结构:
Block类:
参数:in_planes, out_planes, stride
卷积,k=3,stride=stride,p=1,groups=in_planes,bias=False
if参数stride=1,H、W的size不变,→(*,*,in_planes)
else参数stride=2,→(*/2,*/2,in_planes)
→(**,**,in_planes)→BN→relu激活→
卷积,k=1,s=1,p=0,bias=False→(**,**,out_planes)→BN→relu激活→
MobileNet网络结构:
常量cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
参数num_classes=10
_make_layers函数:
参数in_planes=32
for x in cfg:
out_planes = x if isinstance(x, int) else x[0]
stride = 1 if isinstance(x, int) else x[1]
in_planes=32,out_planes=64,stride=1
Block(in_planes, out_planes, stride)
in_planes = out_planes
卷积,k=3,s=1,p=1,bias=False→32*32*32→BN→relu激活→
_make_layers(in_planes=32):
in_planes=32,out_planes=cfg[0]=64,stride=1
Block(32, 64, 1)→分组卷积→32*32*32→卷积→32*32*64→in_planes=64
in_planes=64,out_planes=cfg[1][0]=128,stride=cfg[1][1]=2
Block(64, 128, 2)→分组卷积→16*16*64→卷积→16*16*128→in_planes=128
in_planes=128,out_planes=cfg[2]=128,stride=1
Block(128, 128, 1)→分组卷积→16*16*128→卷积→16*16*128→in_planes=128
in_planes=128,out_planes=cfg[3][0]=256,stride=cfg[3][1]=2
Block(128, 256, 2)→分组卷积→8*8*128→卷积→8*8*256→in_planes=256
in_planes=256,out_planes=cfg[4]=256,stride=1
Block(256, 256, 1)→分组卷积→8*8*256→卷积→8*8*256→in_planes=256
in_planes=256,out_planes=cfg[5][0]=512,stride=cfg[5][1]=2
Block(256, 512, 2)→分组卷积→4*4*256→卷积4*4*512→in_planes=512
in_planes=512,out_planes=cfg[6]=512,stride=1
Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
in_planes=512,out_planes=cfg[7]=512,stride=1
Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
in_planes=512,out_planes=cfg[8]=512,stride=1
Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
in_planes=512,out_planes=cfg[9]=512,stride=1
Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
in_planes=512,out_planes=cfg[10]=512,stride=1
Block(512, 512, 1)→分组卷积→4*4*512→卷积→4*4*512→in_planes=512
in_planes=512,out_planes=cfg[11][0]=1024,stride=cfg[11][1]=2
Block(512, 1024, 2)→分组卷积→2*2*512→卷积2*2*1024→in_planes=1024
in_planes=1024,out_planes=cfg[7]=1024,stride=1
Block(1024, 1024, 1)→分组卷积→2*2*1024→卷积→2*2*1024→in_planes=1024
平均池化,k=2,→1*1*1024→
flatten→1024→
全连接→(1024,10)→
softmax
11、ShuffleNet
class ShuffleBlock(nn.Module):
def __init__(self,groups):
super(ShuffleBlock,self).__init__()
self.groups=groups
def forward(self,x):
'''
[N,C,H,W]->分组操作->[N,C/g,H,W]*g->转置重组->[N,g,H,W]*C/g
'''
N,C,H,W=x.size()
g=self.groups
return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
class Bottleneck(nn.Module):
def __init__(self, in_planes, out_planes, stride, groups):
super(Bottleneck, self).__init__()
self.stride = stride
mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
self.bn1 = nn.BatchNorm2d(mid_planes)
self.shuffle1 = ShuffleBlock(groups=g)
self.conv2 = nn.Conv2d(mid_planes, mid_planes,
kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
self.bn2 = nn.BatchNorm2d(mid_planes)
self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_planes)
self.shortcut = nn.Sequential()
if stride == 2:
self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
res = self.shortcut(x)
out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
return out
class ShuffleNet(nn.Module):
def __init__(self, cfg):
super(ShuffleNet, self).__init__()
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(24)
self.in_planes = 24
self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
self.linear = nn.Linear(out_planes[2], 10)
def _make_layer(self, out_planes, num_blocks, groups):
layers = []
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = self.in_planes if i == 0 else 0
layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
self.in_planes = out_planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
小笔记:
ShuffleNet网络结构:
ShuffleBlock类:
参数groups
# 转置重组操作
# [N,C,H,W]->分组操作->[N,C/g,H,W]*g->转置重组->[N,g,H,W]*C/g
N,C,H,W = x.size()
# permute,原来的1和2做替换
x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
Bottleneck类:
参数in_planes, out_planes, stride, groups
压缩系数mid_planes = out_planes/4
g = 1 if in_planes==24 else groups
卷积,k=1,groups=g,bias=False→(*,*,mid_planes)→BN→relu激活→ShuffleBlock(groups=g)
卷积,k=3,s=stride,p=1,groups=mid_planes,bias=False
if参数stride=1,→(*,*,mid_planes)
else if参数stride=2,→(*/2,*/2,mid_planes)
→(**,**,mid_planes)
→BN→relu激活→
卷积,k=1,groups=g→(**,**,out_planes)→BN→out
shortcut:
if参数stride=2,平均池化,k=3,s=2,p=1→(*/2,*/2,in_planes)
if stride=2:cat(out,shortcut)→(*/2,*/2,out_planes+in_planes)→relu
else if stride=1:→(*,*,out_planes)→relu
ShuffleNet网络结构:
cfg={
'out_planes':[200,400,800],
'num_blocks':[4,8,4],
'groups':2
}
out_planes = cfg['out_planes']
num_blocks = cfg['num_blocks']
groups = cfg['groups']
_make_layer函数:
参数out_planes, num_blocks, groups
for i in range(num_blocks):
stride = 2 if i == 0 else 1
cat_planes = in_planes if i == 0 else 0
Bottleneck(in_planes, out_planes-cat_planes, stride=stride, groups=groups)
in_planes = out_planes
卷积,k=1, bias=False→32*32*24→BN→relu激活→
in_planes = 24
layer1:
_make_layer(out_planes[0], num_blocks[0], groups)
out_planes=200, num_blocks=4, groups=2
i=0,stride = 2,cat_planes = in_planes=24:
Bottleneck(in_planes=24,out_planes=200-24=176,stride=2,groups=2)
mid_planes = out_planes/4=44,
分组卷积g=1→32*32*44→→BN→relu→转置重组→
分组卷积g=44→16*16*44→BN→relu→分组卷积g=1→16*16*176→BN→out
shortcut:平均池化→16*16*24→
cat(out,shortcut)→16*16*(176+24=200)→in_planes=200
i=1,stride = 1,cat_planes = 0:
Bottleneck(in_planes=200,out_planes=200-0=200,stride=1,groups=2)
mid_planes = out_planes/4=50,
分组卷积g=2→16*16*50→→BN→relu→转置重组→
分组卷积g=50→16*16*50→BN→relu→分组卷积g=2→16*16*200→BN→out→relu
→in_planes=200
i=2,stride = 1,cat_planes = 0:
Bottleneck(in_planes=200,out_planes=200-0=200,stride=1,groups=2)
所以i=2、3的操作都和i=1的一样
in_planes = 200
layer2:
_make_layer(out_planes[1], num_blocks[1], groups)
out_planes=400, num_blocks=8, groups=2
i=0,stride = 2,cat_planes = in_planes=200:
Bottleneck(in_planes=200,out_planes=400-200=200,stride=2,groups=2)
mid_planes = out_planes/4=50,
分组卷积g=2→16*16*50→→BN→relu→转置重组→
分组卷积g=50→8*8*50→BN→relu→分组卷积g=2→8*8*200→BN→out
shortcut:平均池化→8*8*200→
cat(out,shortcut)→8*8*(200+200=400)→in_planes=400
i=1,stride = 1,cat_planes = 0:
Bottleneck(in_planes=400,out_planes=400-0=400,stride=1,groups=2)
mid_planes = out_planes/4=100,
分组卷积g=2→8*8*100→→BN→relu→转置重组→
分组卷积g=100→8*8*100→BN→relu→分组卷积g=2→8*8*400→BN→out→relu
→in_planes=400
i=2,stride = 1,cat_planes = 0:
Bottleneck(in_planes=400,out_planes=400-0=400,stride=1,groups=2)
所以i=2、3、4、5、6、7的操作都和i=1的一样
in_planes = 400
layer3:
_make_layer(out_planes[2], num_blocks[2], groups)
out_planes=800, num_blocks=4, groups=2
i=0,stride = 2,cat_planes = in_planes=200:
Bottleneck(in_planes=400,out_planes=800-400=400,stride=2,groups=2)
mid_planes = out_planes/4=100,
分组卷积g=2→4*4*100→→BN→relu→转置重组→
分组卷积g=100→4*4*100→BN→relu→分组卷积g=2→4*4*400→BN→out
shortcut:平均池化→4*4*400→
cat(out,shortcut)→4*4*(400+400=800)→in_planes=800
i=1,stride = 1,cat_planes = 0:
Bottleneck(in_planes=800,out_planes=800-0=800,stride=1,groups=2)
mid_planes = out_planes/4=200,
分组卷积g=2→4*4*200→→BN→relu→转置重组→
分组卷积g=200→4*4*200→BN→relu→分组卷积g=2→4*4*800→BN→out→relu
→in_planes=800
i=2,stride = 1,cat_planes = 0:
Bottleneck(in_planes=800,out_planes=800-0=800,stride=1,groups=2)
所以i=2、3的操作都和i=1的一样
平均池化,k=4→1*1*800→
flatten→800→全连接→(800,10)→softmax
12、SEResNet18
class SELayer(nn.Module):
def __init__(self,channel,reduction=16):
super(SELayer,self).__init__()
self.avg_pool=nn.AdaptiveAvgPool2d(1)
self.fc=nn.Sequential(
nn.Linear(channel,channel//reduction,bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel//reduction,channel,bias=False),
nn.Sigmoid()
)
def forward(self,x):
b,c,_,_=x.size()
y=self.avg_pool(x).view(b,c)
y=self.fc(y).view(b,c,1,1)
y=y.expand_as(x)
return x*y
class SEResNetBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride,reduction=16):
super(SEResNetBlock,self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.se=SELayer(out_channels,reduction)
self.downsample = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = self.se(out)
out += self.downsample(x)
out = F.relu(out)
return out
class SEResNetLayer(nn.Module):
def __init__(self,block,n_blocks,in_channels,out_channels,stride):
super(SEResNetLayer,self).__init__()
self.modules=[]
self.modules.append(block(in_channels,out_channels,stride))
for _ in range(n_blocks-1):
self.modules.append(block(out_channels,out_channels,1))
self.blocks=nn.Sequential(*self.modules)
def forward(self,x):
return self.blocks(x)
class SEResNet18(nn.Module):
def __init__(self,layer,block):
super(SEResNet18,self).__init__()
n_blocks=[2,2,2,2]
self.conv1=nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False)
self.bn1=nn.BatchNorm2d(64)
self.rb1=layer(block,n_blocks[0],64,64,1)
self.rb2=layer(block,n_blocks[1],64,128,2)
self.rb3=layer(block,n_blocks[2],128,256,2)
self.rb4=layer(block,n_blocks[3],256,512,2)
self.fc=nn.Linear(512,10)
def forward(self,x):
out=F.relu(self.bn1(self.conv1(x)))
out=self.rb1(out)
out=self.rb2(out)
out=self.rb3(out)
out=self.rb4(out)
out=F.avg_pool2d(out,4)
out=out.view(out.shape[0],-1)
out=self.fc(out)
return out
小笔记:
SEResNetLayer网络结构:
SELayer类:
channel, reduction=16(论文中是压缩16倍)
b,c,_,_=x.size() # b代表的就是batchsize,c代表的就是通道数,后面H和W是暂时不关心的
AdaptiveAvgPool2d(1) ,动调整核的大小看,在最后都会变成1*1的→.view(b,c) #b*c*1*1->b*c
全连接,bias=False→(channel,channel//reduction)→relu→
全连接,bias=False→(channel//reduction,channel)→sigmoid→
.view(b,c,1,1) # 在b*c->b*c*1*1
→.expand_as(x) # b*c*1*1->b*c*w*h # 得到这个权重
→y
→x*y
SEResNetBlock模块:
参数in_channels, out_channels, stride, reduction=16
卷积,k=3, s=stride, p=1, bias=False:
if stride=1→(*,*,out_channels)
else if stride=2→(*/2,*/2,out_channels)
→(**,**,out_channels)
→BN→relu
卷积,k=3, s=1, p=1, bias=False→(**,**,out_channels)→BN→out
se:SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
if stride != 1 or in_channels != out_channels:
downsample:卷积, kernel_size=1, stride=stride, bias=False:
if stride=2→(*/2,*/2,out_channels)
→BN
add(out,downsample)→relu→
SEResNetLayer类:
参数:block, n_blocks, in_channels, out_channels, stride
其中,block就是SEResNetBlock,n_blocks时一个数字
SEResNetBlock(in_channels, out_channels, stride)
for _ in range(n_blocks-1):
SEResNetBlock(out_channels,out_channels,1)
SEResNet18网络结构:
参数layer,block,其中layer就是SEResNetLayer,block就是SEResNetBlock
n_blocks=[2,2,2,2]
卷积,k=3,s=1,p=1,bias=False→32*32*64→BN→relu→
rb1:
SEResNetLayer(SEResNetBlock,n_blocks[0]=2,64,64,1):
in_channels=64,out_channels=64,stride=1
SEResNetBlock(in_channels, out_channels, stride):
卷积→32*32*64→BN→relu→卷积→32*32*64→BN→out→
se:SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
→relu
for _ in range(n_blocks-1=1):
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→32*32*64→BN→out→se加权中处理→out→relu
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→32*32*64→BN→out→se加权中处理→out→relu
rb2:
SEResNetLayer(SEResNetBlock,n_blocks[1]=2,64,128,2)
in_channels=64,out_channels=128,stride=2
SEResNetBlock(in_channels, out_channels, stride):
卷积→16*16*128→BN→relu→卷积→16*16*128→BN→out→
se:SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
downsample:卷积→16*16*128→BN→
add(out,downsample)→relu→
for _ in range(n_blocks-1=1):
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→16*16*128→BN→out→se加权中处理→out→relu
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→16*16*128→BN→out→se加权中处理→out→relu
rb3:
SEResNetLayer(SEResNetBlock,n_blocks[2]=2,128,256,2)
in_channels=128,out_channels=256,stride=2
SEResNetBlock(in_channels, out_channels, stride):
卷积→8*8*256→BN→relu→卷积→8*8*256→BN→out→
se:SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
downsample:卷积→8*8*256→BN→
add(out,downsample)→relu→
for _ in range(n_blocks-1=1):
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→8*8*256→BN→out→se加权中处理→out→relu
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→8*8*256→BN→out→se加权中处理→out→relu
rb4:
SEResNetLayer(SEResNetBlock,n_blocks[3]=2,256,512,2)
in_channels=128,out_channels=256,stride=2
SEResNetBlock(in_channels, out_channels, stride):
卷积→4*4*512→BN→relu→卷积→4*4*512→BN→out→
se:SELayer(out_channels,reduction) →利用全连接进行加权处理→se(out)→out→
downsample:卷积→4*4*512→BN→
add(out,downsample)→relu→
for _ in range(n_blocks-1=1):
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→4*4*512→BN→out→se加权中处理→out→relu
SEResNetBlock(out_channels,out_channels,1):
卷积→BN→relu→卷积→4*4*512→BN→out→se加权中处理→out→relu
平均池化,k=4→1*1*512→
flatten→512→全连接→(512,10)→softmax