Faster-RCNN系列 三(Framework代码,python)
Faster-RCNN系列 主要分为两大部分组成,
RPN网络结构
Fast-RCNN网络结构
这里引用@霹雳吧啦Wz的Faster-RCNN流程图
再次分为4大模块:
1、transform(归一化处理,resize)
2、BACKbone(特征提取)
3、RPN(建议框,建议框的损失)
4、roi_head(预测结果和预测损失)
5、transform(将图像映射回原图)
定义FasterRCNNBase类,(目的:定义网络框架)
初始化函数__init__(定义所需要的方法,)
def __init__(self, backbone, rpn, roi_heads, transform):
super(FasterRCNNBase, self).__init__()
self.transform = transform
self.backbone = backbone
self.rpn = rpn
self.roi_heads = roi_heads
# used only on torchscript mode
self._has_warned = False
正向传播(首选判断数据的正确性)
def forward(self, images, targets=None):
# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
if self.training and targets is None:
raise ValueError("In training mode, targets should be passed")
if self.training:
assert targets is not None
for target in targets: # 进一步判断传入的target的boxes参数是否符合规定
boxes = target["boxes"]
if isinstance(boxes, torch.Tensor):
if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
raise ValueError("Expected target boxes to be a tensor"
"of shape [N, 4], got {:}.".format(
boxes.shape))
else:
raise ValueError("Expected target boxes to be of type "
"Tensor, got {:}.".format(type(boxes)))
original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
for img in images:
val = img.shape[-2:]
assert len(val) == 2 # 防止输入的是个一维向量
original_image_sizes.append((val[0], val[1]))
images, targets = self.transform(images, targets) # 对图像进行预处理
features = self.backbone(images.tensors) # 将图像输入backbone得到特征图
if isinstance(features, torch.Tensor): # 若只在一层特征层上预测,将feature放入有序字典中,并编号为‘0’
features = OrderedDict([('0', features)]) # 若在多层特征层上预测,传入的就是一个有序字典
# 将特征层以及标注target信息传入rpn中
# proposals: List[Tensor], Tensor_shape: [num_proposals, 4],
# 每个proposals是绝对坐标,且为(x1, y1, x2, y2)格式
proposals, proposal_losses = self.rpn(images, features, targets)
# 将rpn生成的数据以及标注target信息传入fast rcnn后半部分
detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
# 对网络的预测结果进行后处理(主要将bboxes还原到原图像尺度上)
detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
losses = {}
losses.update(detector_losses)
losses.update(proposal_losses)
if torch.jit.is_scripting():
if not self._has_warned:
warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
self._has_warned = True
return losses, detections
else:
return self.eager_outputs(losses, detections)
定义Faster RCNN 网络框架(继承FasterRCNNBase)
class FasterRCNN(FasterRCNNBase):
初始化参数
class FasterRCNN(FasterRCNNBase):
def __init__(self, backbone, num_classes=None,
# transform parameter
min_size=800, max_size=1000, # 预处理resize时限制的最小尺寸与最大尺寸
image_mean=None, image_std=None, # 预处理normalize时使用的均值和方差
# RPN parameters
rpn_anchor_generator=None, rpn_head=None,
rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, # rpn中在nms处理前保留的proposal数(根据score)
rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, # rpn中在nms处理后保留的proposal数
rpn_nms_thresh=0.7, # rpn中进行nms处理时使用的iou阈值
rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, # rpn计算损失时,采集正负样本设置的阈值
rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # rpn计算损失时采样的样本数,以及正样本占总样本的比例
# Box parameters
box_roi_pool=None, box_head=None, box_predictor=None,
# 移除低目标概率 fast rcnn中进行nms处理的阈值 对预测结果根据score排序取前100个目标
box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, # fast rcnn计算误差时,采集正负样本设置的阈值
box_batch_size_per_image=512, box_positive_fraction=0.25, # fast rcnn计算误差时采样的样本数,以及正样本占所有样本的比例
bbox_reg_weights=None):
if not hasattr(backbone, "out_channels"):
raise ValueError(
"backbone should contain an attribute out_channels"
"specifying the number of output channels (assumed to be the"
"same for all the levels"
)
assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))
assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))
if num_classes is not None:
if box_predictor is not None:
raise ValueError("num_classes should be None when box_predictor "
"is specified")
else:
if box_predictor is None:
raise ValueError("num_classes should not be None when box_predictor "
"is not specified")
# 预测特征层的channels
out_channels = backbone.out_channels
# 若anchor生成器为空,则自动生成针对resnet50_fpn的anchor生成器
if rpn_anchor_generator is None:
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
rpn_anchor_generator = AnchorsGenerator(
anchor_sizes, aspect_ratios
)
# 生成RPN通过滑动窗口预测网络部分
if rpn_head is None:
rpn_head = RPNHead(
out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
)
# 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
# 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
# 定义整个RPN框架
rpn = RegionProposalNetwork(
rpn_anchor_generator, rpn_head,
rpn_fg_iou_thresh, rpn_bg_iou_thresh,
rpn_batch_size_per_image, rpn_positive_fraction,
rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
# Multi-scale RoIAlign pooling
if box_roi_pool is None:
box_roi_pool = MultiScaleRoIAlign(
featmap_names=['0', '1', '2', '3'], # 在哪些特征层进行roi pooling
output_size=[7, 7],
sampling_ratio=2)
# fast RCNN中roi pooling后的展平处理两个全连接层部分
if box_head is None:
resolution = box_roi_pool.output_size[0] # 默认等于7
representation_size = 1024
box_head = TwoMLPHead(
out_channels * resolution ** 2,
representation_size
)
# 在box_head的输出上预测部分
if box_predictor is None:
representation_size = 1024
box_predictor = FastRCNNPredictor(
representation_size,
num_classes)
# 将roi pooling, box_head以及box_predictor结合在一起
roi_heads = RoIHeads(
# box
box_roi_pool, box_head, box_predictor,
box_fg_iou_thresh, box_bg_iou_thresh, # 0.5 0.5
box_batch_size_per_image, box_positive_fraction, # 512 0.25
bbox_reg_weights,
box_score_thresh, box_nms_thresh, box_detections_per_img) # 0.05 0.5 100
if image_mean is None:
image_mean = [0.485, 0.456, 0.406]
if image_std is None:
image_std = [0.229, 0.224, 0.225]
# 对数据进行标准化,缩放,打包成batch等处理部分
transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)