- 🍨 本文为🔗365天深度学习训练营中的学习记录博客
- 🍖 原作者:K同学啊|接辅导、项目定制
一、课题背景和开发环境
📌第J4周:ResNet与DenseNet结合探索📌
- 语言:Python3、Pytorch
- 📌本周任务:📌
– 任务类型: 自主探索
– 任务难度: 偏难
– 任务描述:
Ⅰ.请根据J1~J3周的内容自由探索ResNet
与DenseNet
结合的可能性
Ⅱ.是否可以根据两种特性构建一个新的模型框架?
Ⅲ.请用之前的任一图像识别任务验证改进后的模型的效果
🔊注: 打卡内容应该包括创新的思路以及对应模型结构图、代码运行截图
二、网络结构
在网上找到了一个ResNet与DenseNet复合的网络框架DPN,本次课题就以实现DPN(Dual Path Networks)为主。
参考资料:
Higher Order Recurrent Neural Networks
Dual Path Networks
DPN详解(Dual Path Networks)
解读Dual Path Networks(DPN,原创)
Dual Path Networks双分支网络
pytorch实现DPN 最详细的全面讲解
三、使用Pytorch实现
pytorch
class Block(nn.Module):
def __init__(self, in_channel, mid_channel, out_channel, dense_channel, stride, groups, is_shortcut=False):
# in_channel,是输入通道数,mid_channel是中间经历的通道数,out_channels是经过一次板块之后的输出通道数。
# dense_channels设置这个参数的原因就是一边进行着resnet方式的卷积运算,另一边也同时进行着dense的卷积计算,之后特征图融合形成新的特征图
super(Block, self).__init__()
self.is_shortcut = is_shortcut
self.out_channel = out_channel
self.conv1 = nn.Sequential(
nn.Conv2d(in_channel, mid_channel, kernel_size=1, bias=False),
nn.BatchNorm2d(mid_channel),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(mid_channel, mid_channel, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False),
nn.BatchNorm2d(mid_channel),
nn.ReLU()
)
self.conv3 = nn.Sequential(
nn.Conv2d(mid_channel, out_channel+dense_channel, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channel+dense_channel)
)
if self.is_shortcut:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channel, out_channel+dense_channel, kernel_size=3, padding=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channel+dense_channel)
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
a = x
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
if self.is_shortcut:
a = self.shortcut(a)
d = self.out_channel
x = torch.cat([a[:,:d,:,:] + x[:,:d,:,:], a[:,d:,:,:], x[:,d:,:,:]], dim=1)
x = self.relu(x)
return x
class DPN(nn.Module):
def __init__(self, cfg):
super(DPN, self).__init__()
self.group = cfg['group']
self.in_channel = cfg['in_channel']
mid_channels = cfg['mid_channels']
out_channels = cfg['out_channels']
dense_channels = cfg['dense_channels']
num = cfg['num']
self.conv1 = nn.Sequential(
nn.Conv2d(3, self.in_channel, 7, stride=2, padding=3, bias=False, padding_mode='zeros'),
nn.BatchNorm2d(self.in_channel),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
)
self.conv2 = self._make_layers(mid_channels[0], out_channels[0], dense_channels[0], num[0], stride=1)
self.conv3 = self._make_layers(mid_channels[1], out_channels[1], dense_channels[1], num[1], stride=2)
self.conv4 = self._make_layers(mid_channels[2], out_channels[2], dense_channels[2], num[2], stride=2)
self.conv5 = self._make_layers(mid_channels[3], out_channels[3], dense_channels[3], num[3], stride=2)
self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(cfg['out_channels'][3] + (num[3]+1) * cfg['dense_channels'][3], cfg['classes']) # fc层需要计算
def _make_layers(self, mid_channel, out_channel, dense_channel, num, stride=2):
layers = []
layers.append(Block(self.in_channel, mid_channel, out_channel, dense_channel, stride=stride, groups=self.group, is_shortcut=True))
# block_1里面is_shortcut=True就是resnet中的shortcut连接,将浅层的特征进行一次卷积之后与进行三次卷积的特征图相加
# 后面几次相同的板块is_shortcut=False简单的理解就是一个多次重复的板块,第一次利用就可以满足浅层特征的利用,后面重复的不在需要
self.in_channel = out_channel + dense_channel*2
# 由于里面包含dense这种一直在叠加的特征图计算,
# 所以第一次是2倍的dense_channel,后面每次一都会多出1倍,所以有(i+2)*dense_channel
for i in range(1, num):
layers.append(Block(self.in_channel, mid_channel, out_channel, dense_channel, stride=1, groups=self.group))
self.in_channel = self.in_channel + dense_channel
#self.in_channel = out_channel + (i+2)*dense_channel
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.pool(x)
x = torch.flatten(x, start_dim=1)
x = self.fc(x)
return x
def DPN92(n_class=10):
cfg = {
'group': 32,
'in_channel': 64,
'mid_channels': (96, 192, 384, 768),
'out_channels': (256, 512, 1024, 2048),
'dense_channels': (16, 32, 24, 128),
'num': (3, 4, 20, 3),
'classes': (n_class)
}
return DPN(cfg)
def DPN98(n_class=10):
cfg = {
'group': 40,
'in_channel': 96,
'mid_channels': (160, 320, 640, 1280),
'out_channels': (256, 512, 1024, 2048),
'dense_channels': (16, 32, 32, 128),
'num': (3, 6, 20, 3),
'classes': (n_class)
}
return DPN(cfg)
四、打印模型查看&运行
1.模型打印
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 112, 112] 9,408
BatchNorm2d-2 [-1, 64, 112, 112] 128
ReLU-3 [-1, 64, 112, 112] 0
MaxPool2d-4 [-1, 64, 55, 55] 0
Conv2d-5 [-1, 96, 55, 55] 6,144
BatchNorm2d-6 [-1, 96, 55, 55] 192
ReLU-7 [-1, 96, 55, 55] 0
Conv2d-8 [-1, 96, 55, 55] 2,592
BatchNorm2d-9 [-1, 96, 55, 55] 192
ReLU-10 [-1, 96, 55, 55] 0
Conv2d-11 [-1, 272, 55, 55] 26,112
BatchNorm2d-12 [-1, 272, 55, 55] 544
Conv2d-13 [-1, 272, 55, 55] 156,672
BatchNorm2d-14 [-1, 272, 55, 55] 544
ReLU-15 [-1, 288, 55, 55] 0
Block-16 [-1, 288, 55, 55] 0
Conv2d-17 [-1, 96, 55, 55] 27,648
BatchNorm2d-18 [-1, 96, 55, 55] 192
ReLU-19 [-1, 96, 55, 55] 0
Conv2d-20 [-1, 96, 55, 55] 2,592
BatchNorm2d-21 [-1, 96, 55, 55] 192
ReLU-22 [-1, 96, 55, 55] 0
Conv2d-23 [-1, 272, 55, 55] 26,112
BatchNorm2d-24 [-1, 272, 55, 55] 544
ReLU-25 [-1, 304, 55, 55] 0
Block-26 [-1, 304, 55, 55] 0
Conv2d-27 [-1, 96, 55, 55] 29,184
BatchNorm2d-28 [-1, 96, 55, 55] 192
ReLU-29 [-1, 96, 55, 55] 0
Conv2d-30 [-1, 96, 55, 55] 2,592
BatchNorm2d-31 [-1, 96, 55, 55] 192
ReLU-32 [-1, 96, 55, 55] 0
Conv2d-33 [-1, 272, 55, 55] 26,112
BatchNorm2d-34 [-1, 272, 55, 55] 544
ReLU-35 [-1, 320, 55, 55] 0
Block-36 [-1, 320, 55, 55] 0
Conv2d-37 [-1, 192, 55, 55] 61,440
BatchNorm2d-38 [-1, 192, 55, 55] 384
ReLU-39 [-1, 192, 55, 55] 0
Conv2d-40 [-1, 192, 28, 28] 10,368
BatchNorm2d-41 [-1, 192, 28, 28] 384
ReLU-42 [-1, 192, 28, 28] 0
Conv2d-43 [-1, 544, 28, 28] 104,448
BatchNorm2d-44 [-1, 544, 28, 28] 1,088
Conv2d-45 [-1, 544, 28, 28] 1,566,720
BatchNorm2d-46 [-1, 544, 28, 28] 1,088
ReLU-47 [-1, 576, 28, 28] 0
Block-48 [-1, 576, 28, 28] 0
Conv2d-49 [-1, 192, 28, 28] 110,592
BatchNorm2d-50 [-1, 192, 28, 28] 384
ReLU-51 [-1, 192, 28, 28] 0
Conv2d-52 [-1, 192, 28, 28] 10,368
BatchNorm2d-53 [-1, 192, 28, 28] 384
ReLU-54 [-1, 192, 28, 28] 0
Conv2d-55 [-1, 544, 28, 28] 104,448
BatchNorm2d-56 [-1, 544, 28, 28] 1,088
ReLU-57 [-1, 608, 28, 28] 0
Block-58 [-1, 608, 28, 28] 0
Conv2d-59 [-1, 192, 28, 28] 116,736
BatchNorm2d-60 [-1, 192, 28, 28] 384
ReLU-61 [-1, 192, 28, 28] 0
Conv2d-62 [-1, 192, 28, 28] 10,368
BatchNorm2d-63 [-1, 192, 28, 28] 384
ReLU-64 [-1, 192, 28, 28] 0
Conv2d-65 [-1, 544, 28, 28] 104,448
BatchNorm2d-66 [-1, 544, 28, 28] 1,088
ReLU-67 [-1, 640, 28, 28] 0
Block-68 [-1, 640, 28, 28] 0
Conv2d-69 [-1, 192, 28, 28] 122,880
BatchNorm2d-70 [-1, 192, 28, 28] 384
ReLU-71 [-1, 192, 28, 28] 0
Conv2d-72 [-1, 192, 28, 28] 10,368
BatchNorm2d-73 [-1, 192, 28, 28] 384
ReLU-74 [-1, 192, 28, 28] 0
Conv2d-75 [-1, 544, 28, 28] 104,448
BatchNorm2d-76 [-1, 544, 28, 28] 1,088
ReLU-77 [-1, 672, 28, 28] 0
Block-78 [-1, 672, 28, 28] 0
Conv2d-79 [-1, 384, 28, 28] 258,048
BatchNorm2d-80 [-1, 384, 28, 28] 768
ReLU-81 [-1, 384, 28, 28] 0
Conv2d-82 [-1, 384, 14, 14] 41,472
BatchNorm2d-83 [-1, 384, 14, 14] 768
ReLU-84 [-1, 384, 14, 14] 0
Conv2d-85 [-1, 1048, 14, 14] 402,432
BatchNorm2d-86 [-1, 1048, 14, 14] 2,096
Conv2d-87 [-1, 1048, 14, 14] 6,338,304
BatchNorm2d-88 [-1, 1048, 14, 14] 2,096
ReLU-89 [-1, 1072, 14, 14] 0
Block-90 [-1, 1072, 14, 14] 0
Conv2d-91 [-1, 384, 14, 14] 411,648
BatchNorm2d-92 [-1, 384, 14, 14] 768
ReLU-93 [-1, 384, 14, 14] 0
Conv2d-94 [-1, 384, 14, 14] 41,472
BatchNorm2d-95 [-1, 384, 14, 14] 768
ReLU-96 [-1, 384, 14, 14] 0
Conv2d-97 [-1, 1048, 14, 14] 402,432
BatchNorm2d-98 [-1, 1048, 14, 14] 2,096
ReLU-99 [-1, 1096, 14, 14] 0
Block-100 [-1, 1096, 14, 14] 0
Conv2d-101 [-1, 384, 14, 14] 420,864
BatchNorm2d-102 [-1, 384, 14, 14] 768
ReLU-103 [-1, 384, 14, 14] 0
Conv2d-104 [-1, 384, 14, 14] 41,472
BatchNorm2d-105 [-1, 384, 14, 14] 768
ReLU-106 [-1, 384, 14, 14] 0
Conv2d-107 [-1, 1048, 14, 14] 402,432
BatchNorm2d-108 [-1, 1048, 14, 14] 2,096
ReLU-109 [-1, 1120, 14, 14] 0
Block-110 [-1, 1120, 14, 14] 0
Conv2d-111 [-1, 384, 14, 14] 430,080
BatchNorm2d-112 [-1, 384, 14, 14] 768
ReLU-113 [-1, 384, 14, 14] 0
Conv2d-114 [-1, 384, 14, 14] 41,472
BatchNorm2d-115 [-1, 384, 14, 14] 768
ReLU-116 [-1, 384, 14, 14] 0
Conv2d-117 [-1, 1048, 14, 14] 402,432
BatchNorm2d-118 [-1, 1048, 14, 14] 2,096
ReLU-119 [-1, 1144, 14, 14] 0
Block-120 [-1, 1144, 14, 14] 0
Conv2d-121 [-1, 384, 14, 14] 439,296
BatchNorm2d-122 [-1, 384, 14, 14] 768
ReLU-123 [-1, 384, 14, 14] 0
Conv2d-124 [-1, 384, 14, 14] 41,472
BatchNorm2d-125 [-1, 384, 14, 14] 768
ReLU-126 [-1, 384, 14, 14] 0
Conv2d-127 [-1, 1048, 14, 14] 402,432
BatchNorm2d-128 [-1, 1048, 14, 14] 2,096
ReLU-129 [-1, 1168, 14, 14] 0
Block-130 [-1, 1168, 14, 14] 0
Conv2d-131 [-1, 384, 14, 14] 448,512
BatchNorm2d-132 [-1, 384, 14, 14] 768
ReLU-133 [-1, 384, 14, 14] 0
Conv2d-134 [-1, 384, 14, 14] 41,472
BatchNorm2d-135 [-1, 384, 14, 14] 768
ReLU-136 [-1, 384, 14, 14] 0
Conv2d-137 [-1, 1048, 14, 14] 402,432
BatchNorm2d-138 [-1, 1048, 14, 14] 2,096
ReLU-139 [-1, 1192, 14, 14] 0
Block-140 [-1, 1192, 14, 14] 0
Conv2d-141 [-1, 384, 14, 14] 457,728
BatchNorm2d-142 [-1, 384, 14, 14] 768
ReLU-143 [-1, 384, 14, 14] 0
Conv2d-144 [-1, 384, 14, 14] 41,472
BatchNorm2d-145 [-1, 384, 14, 14] 768
ReLU-146 [-1, 384, 14, 14] 0
Conv2d-147 [-1, 1048, 14, 14] 402,432
BatchNorm2d-148 [-1, 1048, 14, 14] 2,096
ReLU-149 [-1, 1216, 14, 14] 0
Block-150 [-1, 1216, 14, 14] 0
Conv2d-151 [-1, 384, 14, 14] 466,944
BatchNorm2d-152 [-1, 384, 14, 14] 768
ReLU-153 [-1, 384, 14, 14] 0
Conv2d-154 [-1, 384, 14, 14] 41,472
BatchNorm2d-155 [-1, 384, 14, 14] 768
ReLU-156 [-1, 384, 14, 14] 0
Conv2d-157 [-1, 1048, 14, 14] 402,432
BatchNorm2d-158 [-1, 1048, 14, 14] 2,096
ReLU-159 [-1, 1240, 14, 14] 0
Block-160 [-1, 1240, 14, 14] 0
Conv2d-161 [-1, 384, 14, 14] 476,160
BatchNorm2d-162 [-1, 384, 14, 14] 768
ReLU-163 [-1, 384, 14, 14] 0
Conv2d-164 [-1, 384, 14, 14] 41,472
BatchNorm2d-165 [-1, 384, 14, 14] 768
ReLU-166 [-1, 384, 14, 14] 0
Conv2d-167 [-1, 1048, 14, 14] 402,432
BatchNorm2d-168 [-1, 1048, 14, 14] 2,096
ReLU-169 [-1, 1264, 14, 14] 0
Block-170 [-1, 1264, 14, 14] 0
Conv2d-171 [-1, 384, 14, 14] 485,376
BatchNorm2d-172 [-1, 384, 14, 14] 768
ReLU-173 [-1, 384, 14, 14] 0
Conv2d-174 [-1, 384, 14, 14] 41,472
BatchNorm2d-175 [-1, 384, 14, 14] 768
ReLU-176 [-1, 384, 14, 14] 0
Conv2d-177 [-1, 1048, 14, 14] 402,432
BatchNorm2d-178 [-1, 1048, 14, 14] 2,096
ReLU-179 [-1, 1288, 14, 14] 0
Block-180 [-1, 1288, 14, 14] 0
Conv2d-181 [-1, 384, 14, 14] 494,592
BatchNorm2d-182 [-1, 384, 14, 14] 768
ReLU-183 [-1, 384, 14, 14] 0
Conv2d-184 [-1, 384, 14, 14] 41,472
BatchNorm2d-185 [-1, 384, 14, 14] 768
ReLU-186 [-1, 384, 14, 14] 0
Conv2d-187 [-1, 1048, 14, 14] 402,432
BatchNorm2d-188 [-1, 1048, 14, 14] 2,096
ReLU-189 [-1, 1312, 14, 14] 0
Block-190 [-1, 1312, 14, 14] 0
Conv2d-191 [-1, 384, 14, 14] 503,808
BatchNorm2d-192 [-1, 384, 14, 14] 768
ReLU-193 [-1, 384, 14, 14] 0
Conv2d-194 [-1, 384, 14, 14] 41,472
BatchNorm2d-195 [-1, 384, 14, 14] 768
ReLU-196 [-1, 384, 14, 14] 0
Conv2d-197 [-1, 1048, 14, 14] 402,432
BatchNorm2d-198 [-1, 1048, 14, 14] 2,096
ReLU-199 [-1, 1336, 14, 14] 0
Block-200 [-1, 1336, 14, 14] 0
Conv2d-201 [-1, 384, 14, 14] 513,024
BatchNorm2d-202 [-1, 384, 14, 14] 768
ReLU-203 [-1, 384, 14, 14] 0
Conv2d-204 [-1, 384, 14, 14] 41,472
BatchNorm2d-205 [-1, 384, 14, 14] 768
ReLU-206 [-1, 384, 14, 14] 0
Conv2d-207 [-1, 1048, 14, 14] 402,432
BatchNorm2d-208 [-1, 1048, 14, 14] 2,096
ReLU-209 [-1, 1360, 14, 14] 0
Block-210 [-1, 1360, 14, 14] 0
Conv2d-211 [-1, 384, 14, 14] 522,240
BatchNorm2d-212 [-1, 384, 14, 14] 768
ReLU-213 [-1, 384, 14, 14] 0
Conv2d-214 [-1, 384, 14, 14] 41,472
BatchNorm2d-215 [-1, 384, 14, 14] 768
ReLU-216 [-1, 384, 14, 14] 0
Conv2d-217 [-1, 1048, 14, 14] 402,432
BatchNorm2d-218 [-1, 1048, 14, 14] 2,096
ReLU-219 [-1, 1384, 14, 14] 0
Block-220 [-1, 1384, 14, 14] 0
Conv2d-221 [-1, 384, 14, 14] 531,456
BatchNorm2d-222 [-1, 384, 14, 14] 768
ReLU-223 [-1, 384, 14, 14] 0
Conv2d-224 [-1, 384, 14, 14] 41,472
BatchNorm2d-225 [-1, 384, 14, 14] 768
ReLU-226 [-1, 384, 14, 14] 0
Conv2d-227 [-1, 1048, 14, 14] 402,432
BatchNorm2d-228 [-1, 1048, 14, 14] 2,096
ReLU-229 [-1, 1408, 14, 14] 0
Block-230 [-1, 1408, 14, 14] 0
Conv2d-231 [-1, 384, 14, 14] 540,672
BatchNorm2d-232 [-1, 384, 14, 14] 768
ReLU-233 [-1, 384, 14, 14] 0
Conv2d-234 [-1, 384, 14, 14] 41,472
BatchNorm2d-235 [-1, 384, 14, 14] 768
ReLU-236 [-1, 384, 14, 14] 0
Conv2d-237 [-1, 1048, 14, 14] 402,432
BatchNorm2d-238 [-1, 1048, 14, 14] 2,096
ReLU-239 [-1, 1432, 14, 14] 0
Block-240 [-1, 1432, 14, 14] 0
Conv2d-241 [-1, 384, 14, 14] 549,888
BatchNorm2d-242 [-1, 384, 14, 14] 768
ReLU-243 [-1, 384, 14, 14] 0
Conv2d-244 [-1, 384, 14, 14] 41,472
BatchNorm2d-245 [-1, 384, 14, 14] 768
ReLU-246 [-1, 384, 14, 14] 0
Conv2d-247 [-1, 1048, 14, 14] 402,432
BatchNorm2d-248 [-1, 1048, 14, 14] 2,096
ReLU-249 [-1, 1456, 14, 14] 0
Block-250 [-1, 1456, 14, 14] 0
Conv2d-251 [-1, 384, 14, 14] 559,104
BatchNorm2d-252 [-1, 384, 14, 14] 768
ReLU-253 [-1, 384, 14, 14] 0
Conv2d-254 [-1, 384, 14, 14] 41,472
BatchNorm2d-255 [-1, 384, 14, 14] 768
ReLU-256 [-1, 384, 14, 14] 0
Conv2d-257 [-1, 1048, 14, 14] 402,432
BatchNorm2d-258 [-1, 1048, 14, 14] 2,096
ReLU-259 [-1, 1480, 14, 14] 0
Block-260 [-1, 1480, 14, 14] 0
Conv2d-261 [-1, 384, 14, 14] 568,320
BatchNorm2d-262 [-1, 384, 14, 14] 768
ReLU-263 [-1, 384, 14, 14] 0
Conv2d-264 [-1, 384, 14, 14] 41,472
BatchNorm2d-265 [-1, 384, 14, 14] 768
ReLU-266 [-1, 384, 14, 14] 0
Conv2d-267 [-1, 1048, 14, 14] 402,432
BatchNorm2d-268 [-1, 1048, 14, 14] 2,096
ReLU-269 [-1, 1504, 14, 14] 0
Block-270 [-1, 1504, 14, 14] 0
Conv2d-271 [-1, 384, 14, 14] 577,536
BatchNorm2d-272 [-1, 384, 14, 14] 768
ReLU-273 [-1, 384, 14, 14] 0
Conv2d-274 [-1, 384, 14, 14] 41,472
BatchNorm2d-275 [-1, 384, 14, 14] 768
ReLU-276 [-1, 384, 14, 14] 0
Conv2d-277 [-1, 1048, 14, 14] 402,432
BatchNorm2d-278 [-1, 1048, 14, 14] 2,096
ReLU-279 [-1, 1528, 14, 14] 0
Block-280 [-1, 1528, 14, 14] 0
Conv2d-281 [-1, 768, 14, 14] 1,173,504
BatchNorm2d-282 [-1, 768, 14, 14] 1,536
ReLU-283 [-1, 768, 14, 14] 0
Conv2d-284 [-1, 768, 7, 7] 165,888
BatchNorm2d-285 [-1, 768, 7, 7] 1,536
ReLU-286 [-1, 768, 7, 7] 0
Conv2d-287 [-1, 2176, 7, 7] 1,671,168
BatchNorm2d-288 [-1, 2176, 7, 7] 4,352
Conv2d-289 [-1, 2176, 7, 7] 29,924,352
BatchNorm2d-290 [-1, 2176, 7, 7] 4,352
ReLU-291 [-1, 2304, 7, 7] 0
Block-292 [-1, 2304, 7, 7] 0
Conv2d-293 [-1, 768, 7, 7] 1,769,472
BatchNorm2d-294 [-1, 768, 7, 7] 1,536
ReLU-295 [-1, 768, 7, 7] 0
Conv2d-296 [-1, 768, 7, 7] 165,888
BatchNorm2d-297 [-1, 768, 7, 7] 1,536
ReLU-298 [-1, 768, 7, 7] 0
Conv2d-299 [-1, 2176, 7, 7] 1,671,168
BatchNorm2d-300 [-1, 2176, 7, 7] 4,352
ReLU-301 [-1, 2432, 7, 7] 0
Block-302 [-1, 2432, 7, 7] 0
Conv2d-303 [-1, 768, 7, 7] 1,867,776
BatchNorm2d-304 [-1, 768, 7, 7] 1,536
ReLU-305 [-1, 768, 7, 7] 0
Conv2d-306 [-1, 768, 7, 7] 165,888
BatchNorm2d-307 [-1, 768, 7, 7] 1,536
ReLU-308 [-1, 768, 7, 7] 0
Conv2d-309 [-1, 2176, 7, 7] 1,671,168
BatchNorm2d-310 [-1, 2176, 7, 7] 4,352
ReLU-311 [-1, 2560, 7, 7] 0
Block-312 [-1, 2560, 7, 7] 0
AdaptiveAvgPool2d-313 [-1, 2560, 1, 1] 0
Linear-314 [-1, 4] 10,244
================================================================
Total params: 67,994,324
Trainable params: 67,994,324
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 489.24
Params size (MB): 259.38
Estimated Total Size (MB): 749.20
----------------------------------------------------------------
DPN(
(conv1): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Block(
(conv1): Sequential(
(0): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Sequential(
(0): Conv2d(64, 272, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(1): Block(
(conv1): Sequential(
(0): Conv2d(288, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(2): Block(
(conv1): Sequential(
(0): Conv2d(304, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(96, 272, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
)
(conv3): Sequential(
(0): Block(
(conv1): Sequential(
(0): Conv2d(320, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Sequential(
(0): Conv2d(320, 544, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(1): Block(
(conv1): Sequential(
(0): Conv2d(576, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(2): Block(
(conv1): Sequential(
(0): Conv2d(608, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(3): Block(
(conv1): Sequential(
(0): Conv2d(640, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(192, 544, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
)
(conv4): Sequential(
(0): Block(
(conv1): Sequential(
(0): Conv2d(672, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Sequential(
(0): Conv2d(672, 1048, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(1): Block(
(conv1): Sequential(
(0): Conv2d(1072, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(2): Block(
(conv1): Sequential(
(0): Conv2d(1096, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(3): Block(
(conv1): Sequential(
(0): Conv2d(1120, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(4): Block(
(conv1): Sequential(
(0): Conv2d(1144, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(5): Block(
(conv1): Sequential(
(0): Conv2d(1168, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(6): Block(
(conv1): Sequential(
(0): Conv2d(1192, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(7): Block(
(conv1): Sequential(
(0): Conv2d(1216, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(8): Block(
(conv1): Sequential(
(0): Conv2d(1240, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(9): Block(
(conv1): Sequential(
(0): Conv2d(1264, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(10): Block(
(conv1): Sequential(
(0): Conv2d(1288, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(11): Block(
(conv1): Sequential(
(0): Conv2d(1312, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(12): Block(
(conv1): Sequential(
(0): Conv2d(1336, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(13): Block(
(conv1): Sequential(
(0): Conv2d(1360, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(14): Block(
(conv1): Sequential(
(0): Conv2d(1384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(15): Block(
(conv1): Sequential(
(0): Conv2d(1408, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(16): Block(
(conv1): Sequential(
(0): Conv2d(1432, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(17): Block(
(conv1): Sequential(
(0): Conv2d(1456, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(18): Block(
(conv1): Sequential(
(0): Conv2d(1480, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(19): Block(
(conv1): Sequential(
(0): Conv2d(1504, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(384, 1048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
)
(conv5): Sequential(
(0): Block(
(conv1): Sequential(
(0): Conv2d(1528, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(2176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Sequential(
(0): Conv2d(1528, 2176, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(2176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(1): Block(
(conv1): Sequential(
(0): Conv2d(2304, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(2176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
(2): Block(
(conv1): Sequential(
(0): Conv2d(2432, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv2): Sequential(
(0): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv3): Sequential(
(0): Conv2d(768, 2176, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(2176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(relu): ReLU(inplace=True)
)
)
(pool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2560, out_features=4, bias=True)
)
2.训练日志
Start training...
[2023-03-03 15:05:47] Epoch: 1, Train_acc:21.9%, Train_loss:1.390, Test_acc:23.0%, Test_loss:1.424, Lr:1.00E-07
acc = 23.0%, saving model to best.pkl
[2023-03-03 15:06:14] Epoch: 2, Train_acc:21.7%, Train_loss:1.398, Test_acc:26.5%, Test_loss:1.405, Lr:1.00E-07
acc = 26.5%, saving model to best.pkl
[2023-03-03 15:06:41] Epoch: 3, Train_acc:22.3%, Train_loss:1.384, Test_acc:25.7%, Test_loss:1.389, Lr:1.00E-07
[2023-03-03 15:07:07] Epoch: 4, Train_acc:23.2%, Train_loss:1.381, Test_acc:25.7%, Test_loss:1.388, Lr:1.00E-07
[2023-03-03 15:07:33] Epoch: 5, Train_acc:22.8%, Train_loss:1.386, Test_acc:24.8%, Test_loss:1.392, Lr:1.00E-07
[2023-03-03 15:07:59] Epoch: 6, Train_acc:24.6%, Train_loss:1.386, Test_acc:27.4%, Test_loss:1.391, Lr:1.00E-07
acc = 27.4%, saving model to best.pkl
[2023-03-03 15:08:28] Epoch: 7, Train_acc:26.8%, Train_loss:1.369, Test_acc:30.1%, Test_loss:1.375, Lr:1.00E-07
acc = 30.1%, saving model to best.pkl
[2023-03-03 15:08:57] Epoch: 8, Train_acc:25.9%, Train_loss:1.387, Test_acc:26.5%, Test_loss:1.371, Lr:1.00E-07
[2023-03-03 15:09:35] Epoch: 9, Train_acc:26.5%, Train_loss:1.375, Test_acc:28.3%, Test_loss:1.395, Lr:1.00E-07
[2023-03-03 15:11:05] Epoch:10, Train_acc:27.4%, Train_loss:1.378, Test_acc:28.3%, Test_loss:1.368, Lr:1.00E-07
Done
EVAL 0.30088, 1.37468