1. 参考链接
2. resnet网络的model.py内容
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=1000,
include_top=True,
groups=1,
width_per_group=64):
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, blocks_num[0])
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel,
channel,
downsample=downsample,
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel,
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
if __name__ == '__main__':
resnet = resnet34()
print('==================1. 通过print打印网络结构=====================')
print(resnet)
print('\n================2. torchsummary:只能打印出来每层名称和参数量情况================')
from torchsummary import summary
summary(resnet, input_size=(3, 224, 224))
print('\n===============3. profile:只能用于计算flops和参数量=====================')
from thop import profile
input = torch.randn(1, 3, 224, 224)
flops, params = profile(resnet, (input,))
print('flops: %.2f M, params: %.2f M' % (flops / 1e9, params / 1e6))
print('\n==============4. named_parameters:查看网络参数(只会打印出来含有可训练参数的数量)=====================')
for name, parameters in resnet.named_parameters():
print(name, ':', parameters.size())
print('\n===================5. torchinfo:每层名字+输出特征图尺寸+参数量=================')
from torchinfo import summary
summary(resnet, (1, 3, 224, 224))
3. 网络结构打印+展示(resnet34)
3.1 直接使用 print 打印网络结构
resnet = resnet34()
print('==================1. 通过print打印网络结构=====================')
print(resnet)
==================1. 通过print打印网络结构=====================
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(5): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
3.2 from torchsummary import summary 打印网络信息
- 没有安装torchsummary的,直接在终端键入
pip install torchsummary
安装即可 - 只能打印出来每层名称和参数量情况
print('\n================2. torchsummary:只能打印出来每层名称和参数量情况================')
from torchsummary import summary
summary(resnet, input_size=(3, 224, 224))
================2. torchsummary:只能打印出来每层名称和参数量情况================
=================================================================
Layer (type:depth-idx) Param #
=================================================================
├─Conv2d: 1-1 9,408
├─BatchNorm2d: 1-2 128
├─ReLU: 1-3 --
├─MaxPool2d: 1-4 --
├─Sequential: 1-5 --
| └─BasicBlock: 2-1 --
| | └─Conv2d: 3-1 36,864
| | └─BatchNorm2d: 3-2 128
| | └─ReLU: 3-3 --
| | └─Conv2d: 3-4 36,864
| | └─BatchNorm2d: 3-5 128
| └─BasicBlock: 2-2 --
| | └─Conv2d: 3-6 36,864
| | └─BatchNorm2d: 3-7 128
| | └─ReLU: 3-8 --
| | └─Conv2d: 3-9 36,864
| | └─BatchNorm2d: 3-10 128
| └─BasicBlock: 2-3 --
| | └─Conv2d: 3-11 36,864
| | └─BatchNorm2d: 3-12 128
| | └─ReLU: 3-13 --
| | └─Conv2d: 3-14 36,864
| | └─BatchNorm2d: 3-15 128
├─Sequential: 1-6 --
| └─BasicBlock: 2-4 --
| | └─Conv2d: 3-16 73,728
| | └─BatchNorm2d: 3-17 256
| | └─ReLU: 3-18 --
| | └─Conv2d: 3-19 147,456
| | └─BatchNorm2d: 3-20 256
| | └─Sequential: 3-21 8,448
| └─BasicBlock: 2-5 --
| | └─Conv2d: 3-22 147,456
| | └─BatchNorm2d: 3-23 256
| | └─ReLU: 3-24 --
| | └─Conv2d: 3-25 147,456
| | └─BatchNorm2d: 3-26 256
| └─BasicBlock: 2-6 --
| | └─Conv2d: 3-27 147,456
| | └─BatchNorm2d: 3-28 256
| | └─ReLU: 3-29 --
| | └─Conv2d: 3-30 147,456
| | └─BatchNorm2d: 3-31 256
| └─BasicBlock: 2-7 --
| | └─Conv2d: 3-32 147,456
| | └─BatchNorm2d: 3-33 256
| | └─ReLU: 3-34 --
| | └─Conv2d: 3-35 147,456
| | └─BatchNorm2d: 3-36 256
├─Sequential: 1-7 --
| └─BasicBlock: 2-8 --
| | └─Conv2d: 3-37 294,912
| | └─BatchNorm2d: 3-38 512
| | └─ReLU: 3-39 --
| | └─Conv2d: 3-40 589,824
| | └─BatchNorm2d: 3-41 512
| | └─Sequential: 3-42 33,280
| └─BasicBlock: 2-9 --
| | └─Conv2d: 3-43 589,824
| | └─BatchNorm2d: 3-44 512
| | └─ReLU: 3-45 --
| | └─Conv2d: 3-46 589,824
| | └─BatchNorm2d: 3-47 512
| └─BasicBlock: 2-10 --
| | └─Conv2d: 3-48 589,824
| | └─BatchNorm2d: 3-49 512
| | └─ReLU: 3-50 --
| | └─Conv2d: 3-51 589,824
| | └─BatchNorm2d: 3-52 512
| └─BasicBlock: 2-11 --
| | └─Conv2d: 3-53 589,824
| | └─BatchNorm2d: 3-54 512
| | └─ReLU: 3-55 --
| | └─Conv2d: 3-56 589,824
| | └─BatchNorm2d: 3-57 512
| └─BasicBlock: 2-12 --
| | └─Conv2d: 3-58 589,824
| | └─BatchNorm2d: 3-59 512
| | └─ReLU: 3-60 --
| | └─Conv2d: 3-61 589,824
| | └─BatchNorm2d: 3-62 512
| └─BasicBlock: 2-13 --
| | └─Conv2d: 3-63 589,824
| | └─BatchNorm2d: 3-64 512
| | └─ReLU: 3-65 --
| | └─Conv2d: 3-66 589,824
| | └─BatchNorm2d: 3-67 512
├─Sequential: 1-8 --
| └─BasicBlock: 2-14 --
| | └─Conv2d: 3-68 1,179,648
| | └─BatchNorm2d: 3-69 1,024
| | └─ReLU: 3-70 --
| | └─Conv2d: 3-71 2,359,296
| | └─BatchNorm2d: 3-72 1,024
| | └─Sequential: 3-73 132,096
| └─BasicBlock: 2-15 --
| | └─Conv2d: 3-74 2,359,296
| | └─BatchNorm2d: 3-75 1,024
| | └─ReLU: 3-76 --
| | └─Conv2d: 3-77 2,359,296
| | └─BatchNorm2d: 3-78 1,024
| └─BasicBlock: 2-16 --
| | └─Conv2d: 3-79 2,359,296
| | └─BatchNorm2d: 3-80 1,024
| | └─ReLU: 3-81 --
| | └─Conv2d: 3-82 2,359,296
| | └─BatchNorm2d: 3-83 1,024
├─AdaptiveAvgPool2d: 1-9 --
├─Linear: 1-10 513,000
=================================================================
Total params: 21,797,672
Trainable params: 21,797,672
Non-trainable params: 0
=================================================================
3.3 from thop import profile 计算出GLOPs和参数量
print('\n===============3. profile:只能用于计算flops和参数量=====================')
from thop import profile
input = torch.randn(1, 3, 224, 224)
flops, params = profile(resnet, (input,))
print('GFLOPs: %.2f M, params: %.2f M' % (flops / 1e9, params / 1e6))
===============3. profile:只能用于计算flops和参数量=====================
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
GFLOPs: 3.68 M, params: 21.80 M
3.4 named_parameters 查看网络参数(只会打印出来含有可训练参数名+尺寸)
print('\n==============4. named_parameters:查看网络参数(只会打印出来含有可训练参数的数量)=====================')
for name, parameters in resnet.named_parameters():
print(name, ':', parameters.size())
==============4. named_parameters:查看网络参数(只会打印出来含有可训练参数的数量)=====================
conv1.weight : torch.Size([64, 3, 7, 7])
bn1.weight : torch.Size([64])
bn1.bias : torch.Size([64])
layer1.0.conv1.weight : torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight : torch.Size([64])
layer1.0.bn1.bias : torch.Size([64])
layer1.0.conv2.weight : torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight : torch.Size([64])
layer1.0.bn2.bias : torch.Size([64])
layer1.1.conv1.weight : torch.Size([64, 64, 3, 3])
layer1.1.bn1.weight : torch.Size([64])
layer1.1.bn1.bias : torch.Size([64])
layer1.1.conv2.weight : torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight : torch.Size([64])
layer1.1.bn2.bias : torch.Size([64])
layer1.2.conv1.weight : torch.Size([64, 64, 3, 3])
layer1.2.bn1.weight : torch.Size([64])
layer1.2.bn1.bias : torch.Size([64])
layer1.2.conv2.weight : torch.Size([64, 64, 3, 3])
layer1.2.bn2.weight : torch.Size([64])
layer1.2.bn2.bias : torch.Size([64])
layer2.0.conv1.weight : torch.Size([128, 64, 3, 3])
layer2.0.bn1.weight : torch.Size([128])
layer2.0.bn1.bias : torch.Size([128])
layer2.0.conv2.weight : torch.Size([128, 128, 3, 3])
layer2.0.bn2.weight : torch.Size([128])
layer2.0.bn2.bias : torch.Size([128])
layer2.0.downsample.0.weight : torch.Size([128, 64, 1, 1])
layer2.0.downsample.1.weight : torch.Size([128])
layer2.0.downsample.1.bias : torch.Size([128])
layer2.1.conv1.weight : torch.Size([128, 128, 3, 3])
layer2.1.bn1.weight : torch.Size([128])
layer2.1.bn1.bias : torch.Size([128])
layer2.1.conv2.weight : torch.Size([128, 128, 3, 3])
layer2.1.bn2.weight : torch.Size([128])
layer2.1.bn2.bias : torch.Size([128])
layer2.2.conv1.weight : torch.Size([128, 128, 3, 3])
layer2.2.bn1.weight : torch.Size([128])
layer2.2.bn1.bias : torch.Size([128])
layer2.2.conv2.weight : torch.Size([128, 128, 3, 3])
layer2.2.bn2.weight : torch.Size([128])
layer2.2.bn2.bias : torch.Size([128])
layer2.3.conv1.weight : torch.Size([128, 128, 3, 3])
layer2.3.bn1.weight : torch.Size([128])
layer2.3.bn1.bias : torch.Size([128])
layer2.3.conv2.weight : torch.Size([128, 128, 3, 3])
layer2.3.bn2.weight : torch.Size([128])
layer2.3.bn2.bias : torch.Size([128])
layer3.0.conv1.weight : torch.Size([256, 128, 3, 3])
layer3.0.bn1.weight : torch.Size([256])
layer3.0.bn1.bias : torch.Size([256])
layer3.0.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.0.bn2.weight : torch.Size([256])
layer3.0.bn2.bias : torch.Size([256])
layer3.0.downsample.0.weight : torch.Size([256, 128, 1, 1])
layer3.0.downsample.1.weight : torch.Size([256])
layer3.0.downsample.1.bias : torch.Size([256])
layer3.1.conv1.weight : torch.Size([256, 256, 3, 3])
layer3.1.bn1.weight : torch.Size([256])
layer3.1.bn1.bias : torch.Size([256])
layer3.1.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.1.bn2.weight : torch.Size([256])
layer3.1.bn2.bias : torch.Size([256])
layer3.2.conv1.weight : torch.Size([256, 256, 3, 3])
layer3.2.bn1.weight : torch.Size([256])
layer3.2.bn1.bias : torch.Size([256])
layer3.2.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.2.bn2.weight : torch.Size([256])
layer3.2.bn2.bias : torch.Size([256])
layer3.3.conv1.weight : torch.Size([256, 256, 3, 3])
layer3.3.bn1.weight : torch.Size([256])
layer3.3.bn1.bias : torch.Size([256])
layer3.3.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.3.bn2.weight : torch.Size([256])
layer3.3.bn2.bias : torch.Size([256])
layer3.4.conv1.weight : torch.Size([256, 256, 3, 3])
layer3.4.bn1.weight : torch.Size([256])
layer3.4.bn1.bias : torch.Size([256])
layer3.4.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.4.bn2.weight : torch.Size([256])
layer3.4.bn2.bias : torch.Size([256])
layer3.5.conv1.weight : torch.Size([256, 256, 3, 3])
layer3.5.bn1.weight : torch.Size([256])
layer3.5.bn1.bias : torch.Size([256])
layer3.5.conv2.weight : torch.Size([256, 256, 3, 3])
layer3.5.bn2.weight : torch.Size([256])
layer3.5.bn2.bias : torch.Size([256])
layer4.0.conv1.weight : torch.Size([512, 256, 3, 3])
layer4.0.bn1.weight : torch.Size([512])
layer4.0.bn1.bias : torch.Size([512])
layer4.0.conv2.weight : torch.Size([512, 512, 3, 3])
layer4.0.bn2.weight : torch.Size([512])
layer4.0.bn2.bias : torch.Size([512])
layer4.0.downsample.0.weight : torch.Size([512, 256, 1, 1])
layer4.0.downsample.1.weight : torch.Size([512])
layer4.0.downsample.1.bias : torch.Size([512])
layer4.1.conv1.weight : torch.Size([512, 512, 3, 3])
layer4.1.bn1.weight : torch.Size([512])
layer4.1.bn1.bias : torch.Size([512])
layer4.1.conv2.weight : torch.Size([512, 512, 3, 3])
layer4.1.bn2.weight : torch.Size([512])
layer4.1.bn2.bias : torch.Size([512])
layer4.2.conv1.weight : torch.Size([512, 512, 3, 3])
layer4.2.bn1.weight : torch.Size([512])
layer4.2.bn1.bias : torch.Size([512])
layer4.2.conv2.weight : torch.Size([512, 512, 3, 3])
layer4.2.bn2.weight : torch.Size([512])
layer4.2.bn2.bias : torch.Size([512])
fc.weight : torch.Size([1000, 512])
fc.bias : torch.Size([1000])
3.5 from torchinfo import summary 打印网络信息
print('\n===================5. torchinfo:每层名字+输出特征图尺寸+参数量=================')
from torchinfo import summary
summary(resnet, (1, 3, 224, 224))
===================5. torchinfo:每层名字+输出特征图尺寸+参数量=================
==========================================================================================
Layer (type:depth-idx) Output Shape Param
==========================================================================================
ResNet [1, 1000] --
├─Conv2d: 1-1 [1, 64, 112, 112] 9,408
├─BatchNorm2d: 1-2 [1, 64, 112, 112] 128
├─ReLU: 1-3 [1, 64, 112, 112] --
├─MaxPool2d: 1-4 [1, 64, 56, 56] --
├─Sequential: 1-5 [1, 64, 56, 56] --
│ └─BasicBlock: 2-1 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-1 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-2 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-3 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-4 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-5 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-6 [1, 64, 56, 56] --
│ └─BasicBlock: 2-2 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-7 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-8 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-9 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-10 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-11 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-12 [1, 64, 56, 56] --
│ └─BasicBlock: 2-3 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-13 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-14 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-15 [1, 64, 56, 56] --
│ │ └─Conv2d: 3-16 [1, 64, 56, 56] 36,864
│ │ └─BatchNorm2d: 3-17 [1, 64, 56, 56] 128
│ │ └─ReLU: 3-18 [1, 64, 56, 56] --
├─Sequential: 1-6 [1, 128, 28, 28] --
│ └─BasicBlock: 2-4 [1, 128, 28, 28] --
│ │ └─Sequential: 3-19 [1, 128, 28, 28] 8,448
│ │ └─Conv2d: 3-20 [1, 128, 28, 28] 73,728
│ │ └─BatchNorm2d: 3-21 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-22 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-23 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-24 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-25 [1, 128, 28, 28] --
│ └─BasicBlock: 2-5 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-26 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-27 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-28 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-29 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-30 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-31 [1, 128, 28, 28] --
│ └─BasicBlock: 2-6 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-32 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-33 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-34 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-35 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-36 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-37 [1, 128, 28, 28] --
│ └─BasicBlock: 2-7 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-38 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-39 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-40 [1, 128, 28, 28] --
│ │ └─Conv2d: 3-41 [1, 128, 28, 28] 147,456
│ │ └─BatchNorm2d: 3-42 [1, 128, 28, 28] 256
│ │ └─ReLU: 3-43 [1, 128, 28, 28] --
├─Sequential: 1-7 [1, 256, 14, 14] --
│ └─BasicBlock: 2-8 [1, 256, 14, 14] --
│ │ └─Sequential: 3-44 [1, 256, 14, 14] 33,280
│ │ └─Conv2d: 3-45 [1, 256, 14, 14] 294,912
│ │ └─BatchNorm2d: 3-46 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-47 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-48 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-49 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-50 [1, 256, 14, 14] --
│ └─BasicBlock: 2-9 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-51 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-52 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-53 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-54 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-55 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-56 [1, 256, 14, 14] --
│ └─BasicBlock: 2-10 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-57 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-58 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-59 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-60 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-61 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-62 [1, 256, 14, 14] --
│ └─BasicBlock: 2-11 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-63 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-64 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-65 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-66 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-67 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-68 [1, 256, 14, 14] --
│ └─BasicBlock: 2-12 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-69 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-70 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-71 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-72 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-73 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-74 [1, 256, 14, 14] --
│ └─BasicBlock: 2-13 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-75 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-76 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-77 [1, 256, 14, 14] --
│ │ └─Conv2d: 3-78 [1, 256, 14, 14] 589,824
│ │ └─BatchNorm2d: 3-79 [1, 256, 14, 14] 512
│ │ └─ReLU: 3-80 [1, 256, 14, 14] --
├─Sequential: 1-8 [1, 512, 7, 7] --
│ └─BasicBlock: 2-14 [1, 512, 7, 7] --
│ │ └─Sequential: 3-81 [1, 512, 7, 7] 132,096
│ │ └─Conv2d: 3-82 [1, 512, 7, 7] 1,179,648
│ │ └─BatchNorm2d: 3-83 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-84 [1, 512, 7, 7] --
│ │ └─Conv2d: 3-85 [1, 512, 7, 7] 2,359,296
│ │ └─BatchNorm2d: 3-86 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-87 [1, 512, 7, 7] --
│ └─BasicBlock: 2-15 [1, 512, 7, 7] --
│ │ └─Conv2d: 3-88 [1, 512, 7, 7] 2,359,296
│ │ └─BatchNorm2d: 3-89 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-90 [1, 512, 7, 7] --
│ │ └─Conv2d: 3-91 [1, 512, 7, 7] 2,359,296
│ │ └─BatchNorm2d: 3-92 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-93 [1, 512, 7, 7] --
│ └─BasicBlock: 2-16 [1, 512, 7, 7] --
│ │ └─Conv2d: 3-94 [1, 512, 7, 7] 2,359,296
│ │ └─BatchNorm2d: 3-95 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-96 [1, 512, 7, 7] --
│ │ └─Conv2d: 3-97 [1, 512, 7, 7] 2,359,296
│ │ └─BatchNorm2d: 3-98 [1, 512, 7, 7] 1,024
│ │ └─ReLU: 3-99 [1, 512, 7, 7] --
├─AdaptiveAvgPool2d: 1-9 [1, 512, 1, 1] --
├─Linear: 1-10 [1, 1000] 513,000
==========================================================================================
Total params: 21,797,672
Trainable params: 21,797,672
Non-trainable params: 0
Total mult-adds (G): 3.66
==========================================================================================
Input size (MB): 0.60
Forward/backward pass size (MB): 59.82
Params size (MB): 87.19
Estimated Total Size (MB): 147.61
==========================================================================================
3.6 暂存:奇怪的现象(调用官方库的resnet34网络,就可以直接通过torchsummary打印出来特征层尺寸大小)
import torchvision.models as models
from torchsummary import summary
resnet34 = models.resnet34()
summary(resnet34, (3, 224, 224))
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
├─Conv2d: 1-1 [-1, 64, 112, 112] 9,408
├─BatchNorm2d: 1-2 [-1, 64, 112, 112] 128
├─ReLU: 1-3 [-1, 64, 112, 112] --
├─MaxPool2d: 1-4 [-1, 64, 56, 56] --
├─Sequential: 1-5 [-1, 64, 56, 56] --
| └─BasicBlock: 2-1 [-1, 64, 56, 56] --
| | └─Conv2d: 3-1 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-2 [-1, 64, 56, 56] 128
| | └─ReLU: 3-3 [-1, 64, 56, 56] --
| | └─Conv2d: 3-4 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-5 [-1, 64, 56, 56] 128
| | └─ReLU: 3-6 [-1, 64, 56, 56] --
| └─BasicBlock: 2-2 [-1, 64, 56, 56] --
| | └─Conv2d: 3-7 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-8 [-1, 64, 56, 56] 128
| | └─ReLU: 3-9 [-1, 64, 56, 56] --
| | └─Conv2d: 3-10 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-11 [-1, 64, 56, 56] 128
| | └─ReLU: 3-12 [-1, 64, 56, 56] --
| └─BasicBlock: 2-3 [-1, 64, 56, 56] --
| | └─Conv2d: 3-13 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-14 [-1, 64, 56, 56] 128
| | └─ReLU: 3-15 [-1, 64, 56, 56] --
| | └─Conv2d: 3-16 [-1, 64, 56, 56] 36,864
| | └─BatchNorm2d: 3-17 [-1, 64, 56, 56] 128
| | └─ReLU: 3-18 [-1, 64, 56, 56] --
├─Sequential: 1-6 [-1, 128, 28, 28] --
| └─BasicBlock: 2-4 [-1, 128, 28, 28] --
| | └─Conv2d: 3-19 [-1, 128, 28, 28] 73,728
| | └─BatchNorm2d: 3-20 [-1, 128, 28, 28] 256
| | └─ReLU: 3-21 [-1, 128, 28, 28] --
| | └─Conv2d: 3-22 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-23 [-1, 128, 28, 28] 256
| | └─Sequential: 3-24 [-1, 128, 28, 28] 8,448
| | └─ReLU: 3-25 [-1, 128, 28, 28] --
| └─BasicBlock: 2-5 [-1, 128, 28, 28] --
| | └─Conv2d: 3-26 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-27 [-1, 128, 28, 28] 256
| | └─ReLU: 3-28 [-1, 128, 28, 28] --
| | └─Conv2d: 3-29 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-30 [-1, 128, 28, 28] 256
| | └─ReLU: 3-31 [-1, 128, 28, 28] --
| └─BasicBlock: 2-6 [-1, 128, 28, 28] --
| | └─Conv2d: 3-32 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-33 [-1, 128, 28, 28] 256
| | └─ReLU: 3-34 [-1, 128, 28, 28] --
| | └─Conv2d: 3-35 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-36 [-1, 128, 28, 28] 256
| | └─ReLU: 3-37 [-1, 128, 28, 28] --
| └─BasicBlock: 2-7 [-1, 128, 28, 28] --
| | └─Conv2d: 3-38 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-39 [-1, 128, 28, 28] 256
| | └─ReLU: 3-40 [-1, 128, 28, 28] --
| | └─Conv2d: 3-41 [-1, 128, 28, 28] 147,456
| | └─BatchNorm2d: 3-42 [-1, 128, 28, 28] 256
| | └─ReLU: 3-43 [-1, 128, 28, 28] --
├─Sequential: 1-7 [-1, 256, 14, 14] --
| └─BasicBlock: 2-8 [-1, 256, 14, 14] --
| | └─Conv2d: 3-44 [-1, 256, 14, 14] 294,912
| | └─BatchNorm2d: 3-45 [-1, 256, 14, 14] 512
| | └─ReLU: 3-46 [-1, 256, 14, 14] --
| | └─Conv2d: 3-47 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-48 [-1, 256, 14, 14] 512
| | └─Sequential: 3-49 [-1, 256, 14, 14] 33,280
| | └─ReLU: 3-50 [-1, 256, 14, 14] --
| └─BasicBlock: 2-9 [-1, 256, 14, 14] --
| | └─Conv2d: 3-51 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-52 [-1, 256, 14, 14] 512
| | └─ReLU: 3-53 [-1, 256, 14, 14] --
| | └─Conv2d: 3-54 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-55 [-1, 256, 14, 14] 512
| | └─ReLU: 3-56 [-1, 256, 14, 14] --
| └─BasicBlock: 2-10 [-1, 256, 14, 14] --
| | └─Conv2d: 3-57 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-58 [-1, 256, 14, 14] 512
| | └─ReLU: 3-59 [-1, 256, 14, 14] --
| | └─Conv2d: 3-60 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-61 [-1, 256, 14, 14] 512
| | └─ReLU: 3-62 [-1, 256, 14, 14] --
| └─BasicBlock: 2-11 [-1, 256, 14, 14] --
| | └─Conv2d: 3-63 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-64 [-1, 256, 14, 14] 512
| | └─ReLU: 3-65 [-1, 256, 14, 14] --
| | └─Conv2d: 3-66 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-67 [-1, 256, 14, 14] 512
| | └─ReLU: 3-68 [-1, 256, 14, 14] --
| └─BasicBlock: 2-12 [-1, 256, 14, 14] --
| | └─Conv2d: 3-69 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-70 [-1, 256, 14, 14] 512
| | └─ReLU: 3-71 [-1, 256, 14, 14] --
| | └─Conv2d: 3-72 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-73 [-1, 256, 14, 14] 512
| | └─ReLU: 3-74 [-1, 256, 14, 14] --
| └─BasicBlock: 2-13 [-1, 256, 14, 14] --
| | └─Conv2d: 3-75 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-76 [-1, 256, 14, 14] 512
| | └─ReLU: 3-77 [-1, 256, 14, 14] --
| | └─Conv2d: 3-78 [-1, 256, 14, 14] 589,824
| | └─BatchNorm2d: 3-79 [-1, 256, 14, 14] 512
| | └─ReLU: 3-80 [-1, 256, 14, 14] --
├─Sequential: 1-8 [-1, 512, 7, 7] --
| └─BasicBlock: 2-14 [-1, 512, 7, 7] --
| | └─Conv2d: 3-81 [-1, 512, 7, 7] 1,179,648
| | └─BatchNorm2d: 3-82 [-1, 512, 7, 7] 1,024
| | └─ReLU: 3-83 [-1, 512, 7, 7] --
| | └─Conv2d: 3-84 [-1, 512, 7, 7] 2,359,296
| | └─BatchNorm2d: 3-85 [-1, 512, 7, 7] 1,024
| | └─Sequential: 3-86 [-1, 512, 7, 7] 132,096
| | └─ReLU: 3-87 [-1, 512, 7, 7] --
| └─BasicBlock: 2-15 [-1, 512, 7, 7] --
| | └─Conv2d: 3-88 [-1, 512, 7, 7] 2,359,296
| | └─BatchNorm2d: 3-89 [-1, 512, 7, 7] 1,024
| | └─ReLU: 3-90 [-1, 512, 7, 7] --
| | └─Conv2d: 3-91 [-1, 512, 7, 7] 2,359,296
| | └─BatchNorm2d: 3-92 [-1, 512, 7, 7] 1,024
| | └─ReLU: 3-93 [-1, 512, 7, 7] --
| └─BasicBlock: 2-16 [-1, 512, 7, 7] --
| | └─Conv2d: 3-94 [-1, 512, 7, 7] 2,359,296
| | └─BatchNorm2d: 3-95 [-1, 512, 7, 7] 1,024
| | └─ReLU: 3-96 [-1, 512, 7, 7] --
| | └─Conv2d: 3-97 [-1, 512, 7, 7] 2,359,296
| | └─BatchNorm2d: 3-98 [-1, 512, 7, 7] 1,024
| | └─ReLU: 3-99 [-1, 512, 7, 7] --
├─AdaptiveAvgPool2d: 1-9 [-1, 512, 1, 1] --
├─Linear: 1-10 [-1, 1000] 513,000
==========================================================================================
Total params: 21,797,672
Trainable params: 21,797,672
Non-trainable params: 0
Total mult-adds (G): 3.71
==========================================================================================
Input size (MB): 0.57
Forward/backward pass size (MB): 57.05
Params size (MB): 83.15
Estimated Total Size (MB): 140.77
==========================================================================================