Faster R-CNN源代码中faster_rcnn文件夹中包含三个文件 faster_rcnn.py,resnet.py,vgg16.py。
1.faster_rcnn.py注释
class _fasterRCNN(nn.Module):
""" faster RCNN """
def __init__(self, classes, class_agnostic):#class-agnostic 方式只回归2类bounding box,即前景和背景
super(_fasterRCNN, self).__init__()
self.classes = classes #类别
self.n_classes = len(classes)#类别数
self.class_agnostic = class_agnostic #前景背景类
# loss 两种loss
self.RCNN_loss_cls = 0
self.RCNN_loss_bbox = 0
# define rpn 定义RPN网络
self.RCNN_rpn = _RPN(self.dout_base_model)
self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)#候选区域对应gt
self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)#POOLING
self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
self.RCNN_roi_crop = _RoICrop()
def forward(self, im_data, im_info, gt_boxes, num_boxes):#图像 图像信息 标注信息 框数目
batch_size = im_data.size(0)
im_info = im_info.data
gt_boxes = gt_boxes.data
num_boxes = num_boxes.data
# feed image data to base model to obtain base feature map
#将图像数据馈送到基础模型以获得基础特征图
base_feat = self.RCNN_base(im_data)
# feed base feature map tp RPN to obtain rois
# 特征图反馈到RPN得到ROIS
rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
# if it is training phrase, then use ground trubut bboxes for refining
#如果是在训练 用ground truth回归
if self.training:
roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
rois_label = Variable(rois_label.view(-1).long())
rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
else:
rois_label = None
rois_target = None
rois_inside_ws = None
rois_outside_ws = None
rpn_loss_cls = 0
rpn_loss_bbox = 0
rois = Variable(rois)
# do roi pooling based on predicted rois
#进行ROI POOLING,下面pooling方式
if cfg.POOLING_MODE == 'crop':
# pdb.set_trace()
# pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
if cfg.CROP_RESIZE_WITH_MAX_POOL:
pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
elif cfg.POOLING_MODE == 'align':
pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
elif cfg.POOLING_MODE == 'pool':
pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
# feed pooled features to top model
#pooling后的特征反馈到上次模型
pooled_feat = self._head_to_tail(pooled_feat)
# compute bbox offset
#计算bounding box的偏移
bbox_pred = self.RCNN_bbox_pred(pooled_feat)
if self.training and not self.class_agnostic:
# select the corresponding columns according to roi labels
# 根据roi标签选择相应的列
bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
bbox_pred = bbox_pred_select.squeeze(1)
# compute object classification probability
# 计算对象分类概率
cls_score = self.RCNN_cls_score(pooled_feat)
cls_prob = F.softmax(cls_score, 1)
RCNN_loss_cls = 0
RCNN_loss_bbox = 0
if self.training:
# classification loss
#分类损失
RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
# bounding box regression L1 loss
#回归损失
RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
#初始化权重
def _init_weights(self):
def normal_init(m, mean, stddev, truncated=False):#均值 标准差
#截断正态 随机正态
"""
weight initalizer: truncated normal and random normal.
"""
# x is a parameter
if truncated:
m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) # not a perfect approximation
else:
m.weight.data.normal_(mean, stddev)
m.bias.data.zero_()
normal_init(self.RCNN_rpn.RPN_Conv, 0, 0.01, cfg.TRAIN.TRUNCATED)
normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
def create_architecture(self):
self._init_modules()
self._init_weights()
【占坑,未完待续…】