PyTorch搭建AlexNet
Alexnet是2012年Imagenet竞赛的冠军模型,准确率达到了57.1%, top-5识别率达到80.2%。
AlexNet网络结构图
结构详细解读
AlexNet网络由8层构成,包括5层卷积层和3层全连接层。输入为3x224x224。
各部分网络结构的参数及输入输出图像大小计算。
层数 | input size | kernel size | filters | stride | padding | output size | max pooling(kernel = 3, stride = 2) |
---|---|---|---|---|---|---|---|
1 | 3x224x224 | 11x11 | 96 | 4 | 2 | 96x55x55 | Yes |
2 | 96x27x27 | 5x5 | 256 | 1 | 2 | 256x27x27 | Yes |
3 | 256x13x13 | 3x3 | 384 | 1 | 1 | 384x13x13 | No |
4 | 384x13x13 | 3x3 | 384 | 1 | 1 | 384x13x13 | No |
5 | 384x13x13 | 3x3 | 256 | 1 | 1 | 256x13x13 | Yes |
PyTorch搭建
import torch
import torch.nn as nn
import torchvision
class AlexNet(nn.Module):
def __init__(self,num_classes=1000):
super(AlexNet,self).__init__()
self.feature_extraction = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=256*6*6,out_features=4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(inplace=True),
nn.Linear(in_features=4096, out_features=num_classes),
)
def forward(self,x):
x = self.feature_extraction(x)
x = x.view(x.size(0),256*6*6)
x = self.classifier(x)
return x
if __name__ =='__main__':
# model = torchvision.models.AlexNet()
model = AlexNet()
print(model)
input = torch.randn(8,3,224,224)
out = model(input)
print(out.shape)