from __future__ import absolute_import
import torch
import torch.nn as nn
import numpy as np
import math
import yaml
from model.utils.config import cfg
from .generate_anchors import generate_anchors
from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
from model.nms.nms_wrapper import nms
import pdb
DEBUG = False
class _ProposalLayer(nn.Module):
"""
Outputs object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def __init__(self, feat_stride, scales, ratios):
super(_ProposalLayer, self).__init__()
self._feat_stride = feat_stride # 缩放比例
self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales),
ratios=np.array(ratios))).float()
self._num_anchors = self._anchors.size(0)
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2)
# top[0].reshape(1, 5)
#
# # scores blob: holds scores for R regions of interest
# if len(top) > 1:
# top[1].reshape(1, 1, 1, 1)
def forward(self, input):
# Algorithm:
#
# 1.for each (H, W) location i
# generate A anchor boxes centered on cell i
# apply predicted bbox deltas at cell i to each of the A anchors
# 2.clip predicted boxes to image
# 3.remove predicted boxes with either height or width < threshold
# 4.sort all (proposal, score) pairs by score from highest to lowest
# 5.take top pre_nms_topN proposals before NMS
# 6.apply NMS with threshold 0.7 to remaining proposals
# 7.take after_nms_topN proposals after NMS
# 8.return the top proposals (-> RoIs top, scores top)
# the first set of _num_anchors channels are bg probs
# the second set are the fg probs
scores = input[0][:, self._num_anchors:, :, :] # input[0] 's shape is (N,18,H/4,W/4)
bbox_deltas = input[1] # rpn_bbox_pred.data shape:(N,36,H/4,W/4)
im_info = input[2] # (H,W,scale=16) 记录了卷积前的图像尺寸以及缩放的比例 16倍
cfg_key = input[3] # 下面需要用到的一些参数
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
batch_size = bbox_deltas.size(0) # N
feat_height, feat_width = scores.size(2), scores.size(3) # feature map 的高和宽,原来的1/4 也就是H/4, W/4
shift_x = np.arange(0, feat_width) * self._feat_stride # shape: [width,]
shift_y = np.arange(0, feat_height) * self._feat_stride # shape: [height,]
# 生成网格 shift_x shape: [height, width], shift_y shape: [height, width]
shift_x, shift_y = np.meshgrid(shift_x, shift_y) # meshgrid返回的就是x,y的坐标矩阵,shift_x返回的是对应原图中所有点横坐标的坐标矩阵
# np.vstack: 按垂直方向(行顺序)堆叠数组构成一个新的数组, ravel()是将矩阵扁平化
shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()) # shape[height*width, 4]
shifts = shifts.contiguous().type_as(scores).float() # 这里的contiguous意思是连续的,如果对这个变量使用view之前进行了
# transpose或者 permute操作,那么它的内存分布很可能不连续,就会出错,所以要用contiguous()来将tensor变成在内存中连续分布的形式
# 上面的shifts把坐标给搞出来了,然后每个点是要生成9个anchor框的,下面就是在做这个
# Enumerate all shifted anchors:
#
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = self._num_anchors # A = 9
K = shifts.size(0) # K=height*width(特征图上的)
self._anchors = self._anchors.type_as(scores)
# anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # expand扩展维度,这里就是复制 K*A * 4到其他batch_size-1维度上
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# 将RPN输出的边框变换信息维度变回[N,H,W,C],再改变一下维度,变成[N,H*W,4]
bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
# Same story for the scores:
# 将RPN输出的分类信息维度变回[N,H,W,C],再改变一下维度,变成[1×H×W×A,1]
scores = scores.permute(0, 2, 3, 1).contiguous()
scores = scores.view(batch_size, -1)
# Convert anchors into proposals via bbox transformations
# 在这里结合RPN的输出变换初始框的坐标,得到第一次变换坐标后的proposals
proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
# 2. clip predicted boxes to image
# 在这里讲超出图像边界的proposal进行边界裁剪,使之在图像边界之内
proposals = clip_boxes(proposals, im_info, batch_size)
# proposals = clip_boxes_batch(proposals, im_info, batch_size)
# assign the score to 0 if it's non keep.
# keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
# trim keep index to make it euqal over batch
# keep_idx = torch.cat(tuple(keep_idx), 0)
# scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
# proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
# _, order = torch.sort(scores_keep, 1, True)
scores_keep = scores
proposals_keep = proposals
_, order = torch.sort(scores_keep, 1, True)
output = scores.new(batch_size, post_nms_topN, 5).zero_()
for i in range(batch_size):
# # 3. remove predicted boxes with either height or width < threshold
# # (NOTE: convert min_size to input image scale stored in im_info[2])
proposals_single = proposals_keep[i]
scores_single = scores_keep[i]
# # 4. sort all (proposal, score) pairs by score from highest to lowest
# # 5. take top pre_nms_topN (e.g. 6000)
order_single = order[i]
if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
order_single = order_single[:pre_nms_topN]
proposals_single = proposals_single[order_single, :]
scores_single = scores_single[order_single].view(-1,1)
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
keep_idx_i = keep_idx_i.long().view(-1)
if post_nms_topN > 0:
keep_idx_i = keep_idx_i[:post_nms_topN]
proposals_single = proposals_single[keep_idx_i, :]
scores_single = scores_single[keep_idx_i, :]
# padding 0 at the end.
num_proposal = proposals_single.size(0)
output[i,:,0] = i
output[i,:num_proposal,1:] = proposals_single
return output
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
def _filter_boxes(self, boxes, min_size):
"""Remove all boxes with any side smaller than min_size."""
ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
return keep
faster-rcnn-pytorch重要源码记录->proposal_layer.py
最新推荐文章于 2024-01-13 18:20:16 发布
本文详细记录了Faster R-CNN在PyTorch实现中,用于生成候选区域的Proposal Layer的源码分析,探讨了其实现原理及其在目标检测中的作用。
摘要由CSDN通过智能技术生成