文章目录
展示模型的结构和参数
- 模型可视化工具
- info 分析
- 每一层的类型、shape 和 参数量
- 模型整体的参数量
- 模型大小,和 fp/bp 一次需要的内存大小,可以用来估计最佳 batch_size
- 目前用于展示模型结构和参数的库主要有:pytorch_model_summary 和 torchsummary
pytorch-model-summary 的学习使用
pip install pytorch-model-summary
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_model_summary import summary
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
# show input shape
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=True))
# show output shape
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=False))
# show output shape and hierarchical view of net
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=False, show_hierarchical=True))
上面代码的输出结果如下
-----------------------------------------------------------------------
Layer (type) Input Shape Param # Tr. Param #
=======================================================================
Conv2d-1 [1, 1, 28, 28] 260 260
Conv2d-2 [1, 10, 12, 12] 5,020 5,020
Dropout2d-3 [1, 20, 8, 8] 0 0
Linear-4 [1, 320] 16,050 16,050
Linear-5 [1, 50] 510 510
=======================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
-----------------------------------------------------------------------
-----------------------------------------------------------------------
Layer (type) Output Shape Param # Tr. Param #
=======================================================================
Conv2d-1 [1, 10, 24, 24] 260 260
Conv2d-2 [1, 20, 8, 8] 5,020 5,020
Dropout2d-3 [1, 20, 8, 8] 0 0
Linear-4 [1, 50] 16,050 16,050
Linear-5 [1, 10] 510 510
=======================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
-----------------------------------------------------------------------
-----------------------------------------------------------------------
Layer (type) Output Shape Param # Tr. Param #
=======================================================================
Conv2d-1 [1, 10, 24, 24] 260 260
Conv2d-2 [1, 20, 8, 8] 5,020 5,020
Dropout2d-3 [1, 20, 8, 8] 0 0
Linear-4 [1, 50] 16,050 16,050
Linear-5 [1, 10] 510 510
=======================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
-----------------------------------------------------------------------
=========================== Hierarchical Summary ===========================
Net(
(conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1)), 260 params
(conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1)), 5,020 params
(conv2_drop): Dropout2d(p=0.5), 0 params
(fc1): Linear(in_features=320, out_features=50, bias=True), 16,050 params
(fc2): Linear(in_features=50, out_features=10, bias=True), 510 params
), 21,840 params
============================================================================
torchsummary 的学习使用
pip install torchsummary
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchsummary import summary
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# CNN for MNIST(自定义的CNN)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Net().to(device)
summary(model, (1, 28, 28))
# VGG16
vgg = models.vgg16().to(device)
summary(vgg, (3, 224, 224))
# Multiple Inputs
class SimpleConv(nn.Module):
def __init__(self):
super(SimpleConv, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 1, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
)
def forward(self, x, y):
x1 = self.features(x)
x2 = self.features(y)
return x1, x2
model = SimpleConv().to(device)
summary(model, [(1, 16, 16), (1, 28, 28)])
上面代码的输出结果如下
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 10, 24, 24] 260
Conv2d-2 [-1, 20, 8, 8] 5,020
Dropout2d-3 [-1, 20, 8, 8] 0
Linear-4 [-1, 50] 16,050
Linear-5 [-1, 10] 510
================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.08
Estimated Total Size (MB): 0.15
----------------------------------------------------------------
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
AdaptiveAvgPool2d-32 [-1, 512, 7, 7] 0
Linear-33 [-1, 4096] 102,764,544
ReLU-34 [-1, 4096] 0
Dropout-35 [-1, 4096] 0
Linear-36 [-1, 4096] 16,781,312
ReLU-37 [-1, 4096] 0
Dropout-38 [-1, 4096] 0
Linear-39 [-1, 1000] 4,097,000
================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.78
Params size (MB): 527.79
Estimated Total Size (MB): 747.15
----------------------------------------------------------------
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 1, 16, 16] 10
ReLU-2 [-1, 1, 16, 16] 0
Conv2d-3 [-1, 1, 28, 28] 10
ReLU-4 [-1, 1, 28, 28] 0
================================================================
Total params: 20
Trainable params: 20
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.77
Forward/backward pass size (MB): 0.02
Params size (MB): 0.00
Estimated Total Size (MB): 0.78
----------------------------------------------------------------
pytorch-model-summary 和 torchsummary的不同
- 导入库的方式不同,即名称不同
# 导入pytorch_model_summary
from pytorch_model_summary import summary
# 导入torchsummary
from torchsummary import summary
- 调用方式不同
# pytorch_model_summary 版summary的调用
# 不需要将model传入GPU中
# show input shape
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=True))
# show output shape
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=False))
# show output shape and hierarchical view of net
print(summary(Net(), torch.zeros((1, 1, 28, 28)), show_input=False, show_hierarchical=True))
# torchsummary 版summary的调用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
summary(model, (1, 28, 28))