卷积神经网络参数量计算及显存占用

参数量的计算

卷积神经网络的参数位于卷积层和全连接层,其中一个全连接层要比卷积层多好多参数。

显存占用

显存占用主要是模型(卷积层和全连接层的参数所占的显存)和各层的输出所占的显存。

下面以VGG16为例来讲解。
在这里插入图片描述
各层的具体情况如下

网络层(操作)输入filterstridepadding输出参数量计算公式参数量显存占用
Input224x224x3224x224x30224x224x3
Conv3-64224x224x33x3x6411224x224x643x3x3x64 (+ 64)(括号里面是bias项)1728(1792)224x224x64
Conv3-64224x224x643x3x6411224x224x643x3x64x64 (+ 64)36864(36928)224x224x64
MaxPool2224x224x642x220112x112x640112x112x64
Conv3-128112x112x643x3x12811112x112x1283x3x64x128 (+ 128)73728(73856)112x112x128
Conv3-128112x112x1283x3x12811112x112x1283x3x128x128 (+ 128)147456(147584)112x112x128
MaxPool2112x112x1282x22056x56x128056x56x128
Conv3-25656x56x1283x3x2561156x56x2563x3x128x256 (+ 256)294912(295168)56x56x256
Conv3-25656x56x2563x3x2561156x56x2563x3x256x256 (+ 256)589824(590080)56x56x256
Conv3-25656x56x2563x3x2561156x56x2563x3x256x256 (+ 256)589824(590080)56x56x256
MaxPool256x56x2562x22028x28x256028x28x256
Conv3-51228x28x2563x3x5121128x28x5123x3x256x512 (+ 512)1179648(1180160)28x28x512
Conv3-51228x28x5123x3x5121128x28x5123x3x512x512 (+ 512)2359296(2359808)28x28x512
Conv3-51228x28x5123x3x5121128x28x5123x3x512x512 (+ 512)2359296(2359808)28x28x512
MaxPool228x28x5122x22014x14x512014x14x512
Conv3-51214x14x5123x3x5121114x14x5123x3x512x512 (+ 512)2359296(2359808)14x14x512
Conv3-51214x14x5123x3x5121114x14x5123x3x512x512 (+ 512)2359296(2359808)14x14x512
Conv3-51214x14x5123x3x5121114x14x5123x3x512x512 (+ 512)2359296(2359808)14x14x512
MaxPool214x14x5122x2207x7x51207x7x512
FC17x7x51240967x7x512x4096 (+ 4096)102760448(102764544)4096
FC2409640964096*4096 (+ 4096)16777216(16781312)4096
FC3409610004096*1000 (+ 1000)4096000(4097000)1000

总共参数量
138,344,128(138357544)

占用显存

参数占用显存
138344128 *4 /1024 /1024 = 527.74MB ~ 528MB(一共有138344128个参数,每个都是float32类型的,即一个占用32位=4bytes)

模型占用显存
15,237,608 *4 /1024 /1024 = 58.12MB / image

在这里插入图片描述
128 * 58.12 MB * 2(这里乘以2,因为forward和backward)

验证代码

# bias = False
import torch
import torch.nn as nn

class VGG(nn.Module):
    def __init__(self, vgg_name, cfg, num_classes=10, bn=False):
        super(VGG, self).__init__()
        self.vgg_base = self.make_layer(cfg, bn)

        if vgg_name == 'vgg16_C':
            self.fc1 = nn.Sequential(nn.Linear(512 * 8 * 8, 4096),
                                     nn.ReLU(inplace=True),
                                     nn.Dropout())
        else:
            self.fc1 = nn.Sequential(nn.Linear(512 * 7 * 7, 4096, bias= False), # 这里的4096只是一个经验值,当然可以变为别的数,但不要小于要预测的类别数
                                     nn.ReLU(inplace=True),
                                     nn.Dropout())
        self.fc2 = nn.Sequential(nn.Linear(4096, 4096,bias= False),
                                 nn.ReLU(inplace=True),
                                 nn.Dropout())
        self.fc3 = nn.Linear(4096, num_classes,bias= False)

    def make_layer(self, cfg, bn=False):
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d((2, 2), stride=2)]
            else:
                out_channels, s = v.strip().split('_') # 例如512  3
                out_channels, s = int(out_channels), int(s)

                if bn:
                    layers += [nn.Conv2d(in_channels, out_channels, (s, s), padding=1, bias= False),
                               nn.BatchNorm2d(out_channels),
                               nn.ReLU(inplace=True)]  # nn.Conv2d bias默认是true
                else:
                    layers += [nn.Conv2d(in_channels, out_channels, (s, s), padding=1, bias= False),
                               nn.ReLU(inplace=True)]
                in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        batch_size = x.size()[0]
        x = self.vgg_base(x)
        print (x.shape) # [1,512,7,7]
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


cfg = {
    'vgg11_A': ['64_3', 'M',
                '128_3', 'M',
                '256_3', '256_3', 'M',
                '512_3', '512_3', 'M',
                '512_3', '512_3', 'M'],
    'vgg13_B': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', 'M',
                '512_3', '512_3', 'M',
                '512_3', '512_3', 'M'],
    'vgg16_C': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_1', 'M',
                '512_3', '512_3', '512_1', 'M',
                '512_3', '512_3', '512_1', 'M'],
    'vgg16_D': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_3', 'M',
                '512_3', '512_3', '512_3', 'M',
                '512_3', '512_3', '512_3', 'M'],
    'vgg19_E': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_3', '256_3', 'M',
                '512_3', '512_3', '512_3', '512_3', 'M',
                '512_3', '512_3', '512_3', '512_3', 'M'],
}

if __name__ == '__main__':
    input_tensor = torch.randn((1, 3, 224, 224))
    input_var = torch.autograd.Variable(input_tensor)

    vgg_name = 'vgg16_D'
    model = VGG(vgg_name, cfg[vgg_name], num_classes=1000)
    output = model(input_var)
    print (output.shape)
    print(output.dtype)
    print('total_parameter_num:', sum(param.numel() for param in model.parameters()))
# bias=True
import torch
import torch.nn as nn

class VGG(nn.Module):
    def __init__(self, vgg_name, cfg, num_classes=10, bn=False):
        super(VGG, self).__init__()
        self.vgg_base = self.make_layer(cfg, bn)

        if vgg_name == 'vgg16_C':
            self.fc1 = nn.Sequential(nn.Linear(512 * 8 * 8, 4096),
                                     nn.ReLU(inplace=True),
                                     nn.Dropout())
        else:
            self.fc1 = nn.Sequential(nn.Linear(512 * 7 * 7, 4096), # 这里的4096只是一个经验值,当然可以变为别的数,但不要小于要预测的类别数
                                     nn.ReLU(inplace=True),
                                     nn.Dropout())
        self.fc2 = nn.Sequential(nn.Linear(4096, 4096),
                                 nn.ReLU(inplace=True),
                                 nn.Dropout())
        self.fc3 = nn.Linear(4096, num_classes)

    def make_layer(self, cfg, bn=False):
        layers = []
        in_channels = 3
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d((2, 2), stride=2)]
            else:
                out_channels, s = v.strip().split('_') # 例如512  3
                out_channels, s = int(out_channels), int(s)

                if bn:
                    layers += [nn.Conv2d(in_channels, out_channels, (s, s), padding=1),
                               nn.BatchNorm2d(out_channels),
                               nn.ReLU(inplace=True)]  # nn.Conv2d bias默认是true
                else:
                    layers += [nn.Conv2d(in_channels, out_channels, (s, s), padding=1),
                               nn.ReLU(inplace=True)]
                in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        batch_size = x.size()[0]
        x = self.vgg_base(x)
        print (x.shape) # [1,512,7,7]
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


cfg = {
    'vgg11_A': ['64_3', 'M',
                '128_3', 'M',
                '256_3', '256_3', 'M',
                '512_3', '512_3', 'M',
                '512_3', '512_3', 'M'],
    'vgg13_B': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', 'M',
                '512_3', '512_3', 'M',
                '512_3', '512_3', 'M'],
    'vgg16_C': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_1', 'M',
                '512_3', '512_3', '512_1', 'M',
                '512_3', '512_3', '512_1', 'M'],
    'vgg16_D': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_3', 'M',
                '512_3', '512_3', '512_3', 'M',
                '512_3', '512_3', '512_3', 'M'],
    'vgg19_E': ['64_3', '64_3', 'M',
                '128_3', '128_3', 'M',
                '256_3', '256_3', '256_3', '256_3', 'M',
                '512_3', '512_3', '512_3', '512_3', 'M',
                '512_3', '512_3', '512_3', '512_3', 'M'],
}

if __name__ == '__main__':
    input_tensor = torch.randn((1, 3, 224, 224))
    input_var = torch.autograd.Variable(input_tensor)

    vgg_name = 'vgg16_D'
    model = VGG(vgg_name, cfg[vgg_name], num_classes=1000)
    output = model(input_var)
    print (output.shape)
    print(output.dtype)
    print('total_parameter_num:', sum(param.numel() for param in model.parameters()))

参考
https://blog.csdn.net/qq_38262728/article/details/89813503
https://blog.csdn.net/qian99/article/details/79008053

  • 4
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值