PyTorch实现:经典网络 NiN (Network in Network)
多输入多输出卷积:
一般输入的图像具有
3
∗
h
∗
w
3 * h * w
3∗h∗w 的形状,是一个通道数为3,尺寸
h
∗
w
h * w
h∗w 。一般性的,对于输入的数据
(
c
i
n
,
h
,
w
)
(c_{in}, h, w)
(cin,h,w),输入的通道数为
c
i
n
c_{in}
cin,定义卷积核的输出通道为
c
o
u
t
c_{out}
cout,卷积核的窗口形状为
k
h
∗
k
w
k_h*k_w
kh∗kw,这样卷积核的形状就是
(
c
i
n
,
c
o
u
t
,
k
h
,
k
w
)
(c_{in}, c_{out}, k_h, k_w)
(cin,cout,kh,kw)。
卷积层本质上是通过卷积核对图像的聚合操作,可以有效提取相邻像素之间的相关特征,识别元素间相互作用。
1. 1x1卷积
很显然,1×1卷积只能对单个像素位置作用,因此就不能实现卷积的聚合效应。但是,1×1卷积的作用在于,它可以在同一像素位置,跨通道的聚合不同通道的特征,起到降维或升维的作用,改变参数规模。
2. NiN 块
在卷积的后面添加 1×1 卷积,加强了同一像素对于的特征的重新组合,降低类维度。
def nin_block(self, in_channels, out_channels, kernel_size, strides, padding):
r'''
parameters:
in_channels: 输入通道
out_channels: 输出通道
kernel_size: 第一层卷积的卷积核大小
strides: 第一层卷积的步长
padding: 第一层卷积的填充
第一层卷积之后,接两个1*1卷积。
'''
return nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size=kernel_size,
padding=padding
),
nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU()
)
3. Network in Network
NiN块组合成的网络,并在最后使用AdaptiveAvgPool2d,这样最后的分类层没有参数,降低了网络的计算量。而且提升了模型的可解释性,将最后的特征图看作分类目标的confidence map。
在模型中,添加Dropout层来降低过拟合。
class NIN(nn.Module):
def __init__(self, in_channels, num_channels):
r"""
parameters:
in_channels: 训练样本的通道数,
num_channels: 每个NiN块的输出通道数
"""
super(NIN, self).__init__()
self.in_channels = in_channels
self.num_channels = num_channels
self._conv_blk = self.vgg_strc()
def forward(self, x):
for layer in self._conv_blk:
x = layer(x)
# print(layer.__class__.__name__, "output shape: ", x.shape)
return x
def vgg_strc(self):
layers = []
in_channels = self.in_channels
for i in range(len(self.num_channels)):
layers.append(self.nin_block(in_channels, self.num_channels[i], kernel_size=11, strides=4, padding=0))
layers.append(nn.MaxPool2d(3, stride=2))
in_channels = self.num_channels[i]
layers.append(nn.Dropout(0.5))
layers.append(self.nin_block(self.num_channels[-1], 10, kernel_size=3, strides=1, padding=1))
layers.append(nn.AdaptiveAvgPool2d((1, 1)))
layers.append(nn.Flatten())
return nn.Sequential(*layers)
def nin_block(self, in_channels, out_channels, kernel_size, strides, padding):
return nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size=kernel_size,
padding=padding
),
nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU()
)
num_channels = (96, 256, 384)
# 受限于电脑性能,缩减网络规模
ratio = 16
small_num_channels = [pair // ratio for pair in num_channels]
net = NIN(1, small_num_channels)
x = torch.randn(1, 1, 224, 224)
net(x).shape
输出:
Sequential output shape: torch.Size([1, 6, 214, 214])
MaxPool2d output shape: torch.Size([1, 6, 106, 106])
Sequential output shape: torch.Size([1, 16, 96, 96])
MaxPool2d output shape: torch.Size([1, 16, 47, 47])
Sequential output shape: torch.Size([1, 24, 37, 37])
MaxPool2d output shape: torch.Size([1, 24, 18, 18])
Dropout output shape: torch.Size([1, 24, 18, 18])
Sequential output shape: torch.Size([1, 10, 18, 18])
AdaptiveAvgPool2d output shape: torch.Size([1, 10, 1, 1])
Flatten output shape: torch.Size([1, 10])
4. FashionMNIST数据集上训练测试
def load_datasets_FashionMNIST(batch_size, resize=None):
trans = [transforms.ToTensor()]
if resize:
transform = trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
train_data = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
test_data = torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=True)
print("FashionMNIST 下载完成...")
return (torch.utils.data.DataLoader(train_data, batch_size, shuffle=True),
torch.utils.data.DataLoader(test_data, batch_size, shuffle=False))
train_iter, test_iter = load_datasets_FashionMNIST(128, resize=224)