本文是个人对blinear-cnn-faster-master的代码的理解,blinear-cnn-faster-master是Hao Zhang编写的Bilinear CNN的改进版。
1.文件结构
文件结构和主要改进如下图示:
2 model.py文件
model的主要改进如下:
代码如下:
# -*- coding: utf-8 -*-
"""Mean field B-CNN model."""
import torch
import torchvision
torch.set_default_dtype(torch.float32)
torch.set_default_tensor_type(torch.FloatTensor)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.benckmark = True
__all__ = ['BCNN']
__author__ = 'Hao Zhang'
__copyright__ = '2018 LAMDA'
__date__ = '2018-01-09'
__email__ = 'zhangh0214@gmail.com'
__license__ = 'CC BY-SA 3.0'
__status__ = 'Development'
__updated__ = '2018-05-21'
__version__ = '13.7'
class BCNN(torch.nn.Module):
"""Mean field B-CNN model.
vgg16去掉最后一层pool5层,倒数第二层卷积层,添加一层ReLU层即relu5_3
The B-CNN model is illustrated as follows.
conv1^2 (64) -> pool1 -> conv2^2 (128) -> pool2 -> conv3^3 (256) -> pool3
-> conv4^3 (512) -> pool4 -> conv5^3 (512) -> mean field bilinear pooling
-> fc.
The network accepts a 3*448*448 input, and the relu5-3 activation has shape
512*28*28 since we down-sample 4 times.
Attributes:
_is_all, bool: In the all/fc phase.
features, torch.nn.Module: Convolution and pooling layers.
bn, torch.nn.Module.
gap_pool, torch.nn.Module.
mf_relu, torch.nn.Module.
mf_pool, torch.nn.Module.
fc, torch.nn.Module.
"""
def __init__(self, num_classes, is_all):
"""Declare all needed layers.
Args:
num_classes, int.
is_all, bool: In the all/fc phase.
"""
torch.nn.Module.__init__(self)
self._is_all = is_all
if self._is_all:
# Convolution and pooling layers of VGG-16.
self.features = torchvision.models.vgg16(pretrained=True).features
self.features = torch.nn.Sequential(*list(self.features.children())
[:-2]) # Remove pool5.和一个卷积层
# Mean filed pooling layer.去掉一个卷积层,增加一个relu5_3层
self.relu5_3 = torch.nn.ReLU(inplace=False)
# Classification layer.
self.fc = torch.nn.Linear(
in_features=512 * 512, out_features=num_classes, bias=True)
if not self._is_all:##初始化权重weight和bias
self.apply(BCNN._initParameter)
def _initParameter(module):#初始化权重weight和bias
"""Initialize the weight and bias for each module.
Args:
module, torch.nn.Module.
"""
if isinstance(module, torch.nn.BatchNorm2d):#BatchNorm2d层
torch.nn.init.constant_(module.weight, val=1.0)
torch.nn.init.constant_(module.bias, val=0.0)
elif isinstance(module, torch.nn.Conv2d):#Conv2d层
torch.nn.init.kaiming_normal_(module.weight, a=0, mode='fan_out',
nonlinearity='relu')
if module.bias is not None:
torch.nn.init.constant_(module.bias, val=0.0)
elif isinstance(module, torch.nn.Linear):#Linear层
if module.bias is not None:
torch.nn.init.constant_(module.bias, val=0.0)
def forward(self, X):
"""Forward pass of the network.
Args:
X, torch.Tensor (N*3*448*448).
Returns:
score, torch.Tensor (N*200).
"""
# Input. featuresd的输入(N, 3, 448, 448),输出为 (N, 512, 28, 28)
N = X.size()[0]
if self._is_all:
assert X.size() == (N, 3, 448, 448)
X = self.features(X)
assert X.size() == (N, 512, 28, 28)
# The main branch.
X = self.relu5_3(X)#进入relu5_3层
assert X.size() == (N, 512, 28, 28)
# Classical bilinear pooling.
X = torch.reshape(X, (N, 512, 28 * 28))#tensor整形为(N, 512, 28 * 28)
X = torch.bmm(X, torch.transpose(X, 1, 2)) / (28 * 28)#Bilinear
assert X.size() == (N, 512, 512)#当前的size为(N, 512, 512)
X = torch.reshape(X, (N, 512 * 512))#tensor整形为(N, 512 * 512)
# Normalization.
# X = torch.sign(X) * torch.sqrt(torch.abs(X) + 1e-5)
X = torch.sqrt(X + 1e-5)
X = torch.nn.functional.normalize(X)#normalize
# Classification.
X = self.fc(X)#进入fc层得到分类结果
return X
3 train.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Fine-tune all layers for bilinear CNN.
This is the second step.
"""
import os
import time
import torch
import torchvision
import cub200
import model
torch.set_default_dtype(torch.float32)
torch.set_default_tensor_type(torch.FloatTensor)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.benchmark = True
__all__ = ['BCNNManager']
__author__ = 'Hao Zhang'
__copyright__ = '2018 LAMDA'
__date__ = '2018-01-11'
__email__ = 'zhangh0214@gmail.com'
__license__