Faster-RCNN系列三（Framework代码，python）

最新推荐文章于 2024-01-23 14:50:24 发布

一直小老虎

最新推荐文章于 2024-01-23 14:50:24 发布

阅读量1k

点赞数 1

文章标签：深度学习 python

本文链接：https://blog.csdn.net/weixin_45344267/article/details/113753403

版权

Faster-RCNN系列三（Framework代码，python）
Faster-RCNN系列主要分为两大部分组成，
RPN网络结构
Fast-RCNN网络结构
这里引用@霹雳吧啦Wz的Faster-RCNN流程图
在这里插入图片描述
再次分为4大模块：
1、transform（归一化处理，resize）
2、BACKbone（特征提取）
3、RPN（建议框，建议框的损失）
4、roi_head(预测结果和预测损失)
5、transform（将图像映射回原图）

定义FasterRCNNBase类，（目的：定义网络框架）
初始化函数__init__（定义所需要的方法，）

def __init__(self, backbone, rpn, roi_heads, transform):
    super(FasterRCNNBase, self).__init__()
    self.transform = transform
    self.backbone = backbone
    self.rpn = rpn
    self.roi_heads = roi_heads
    # used only on torchscript mode
    self._has_warned = False

正向传播（首选判断数据的正确性）

def forward(self, images, targets=None):
    # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
    if self.training and targets is None:
        raise ValueError("In training mode, targets should be passed")
    if self.training:
        assert targets is not None
        for target in targets:         # 进一步判断传入的target的boxes参数是否符合规定
            boxes = target["boxes"]
            if isinstance(boxes, torch.Tensor):
                if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                    raise ValueError("Expected target boxes to be a tensor"
                                     "of shape [N, 4], got {:}.".format(
                                      boxes.shape))
            else:
                raise ValueError("Expected target boxes to be of type "
                                 "Tensor, got {:}.".format(type(boxes)))

    original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
    for img in images:
        val = img.shape[-2:]
        assert len(val) == 2  # 防止输入的是个一维向量
        original_image_sizes.append((val[0], val[1]))
    images, targets = self.transform(images, targets)  # 对图像进行预处理
    features = self.backbone(images.tensors)  # 将图像输入backbone得到特征图
    if isinstance(features, torch.Tensor):  # 若只在一层特征层上预测，将feature放入有序字典中，并编号为‘0’
        features = OrderedDict([('0', features)])  # 若在多层特征层上预测，传入的就是一个有序字典
    # 将特征层以及标注target信息传入rpn中
    # proposals: List[Tensor], Tensor_shape: [num_proposals, 4],
    # 每个proposals是绝对坐标，且为(x1, y1, x2, y2)格式
    proposals, proposal_losses = self.rpn(images, features, targets)
    # 将rpn生成的数据以及标注target信息传入fast rcnn后半部分
    detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
    # 对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
    detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
    losses = {}
    losses.update(detector_losses)
    losses.update(proposal_losses)
    if torch.jit.is_scripting():
        if not self._has_warned:
            warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
            self._has_warned = True
        return losses, detections
    else:
        return self.eager_outputs(losses, detections)

定义Faster RCNN 网络框架（继承FasterRCNNBase）
class FasterRCNN(FasterRCNNBase):
初始化参数

class FasterRCNN(FasterRCNNBase):
    def __init__(self, backbone, num_classes=None,
                 # transform parameter
                 min_size=800, max_size=1000,      # 预处理resize时限制的最小尺寸与最大尺寸
                 image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,    # rpn中在nms处理前保留的proposal数(根据score)
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,  # rpn中在nms处理后保留的proposal数
                 rpn_nms_thresh=0.7,  # rpn中进行nms处理时使用的iou阈值
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,  # rpn计算损失时，采集正负样本设置的阈值
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,  # rpn计算损失时采样的样本数，以及正样本占总样本的比例
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 # 移除低目标概率      fast rcnn中进行nms处理的阈值   对预测结果根据score排序取前100个目标
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,   # fast rcnn计算误差时，采集正负样本设置的阈值
                 box_batch_size_per_image=512, box_positive_fraction=0.25,  # fast rcnn计算误差时采样的样本数，以及正样本占所有样本的比例
                 bbox_reg_weights=None):
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels"
                "specifying the number of output channels  (assumed to be the"
                "same for all the levels"
            )

        assert isinstance(rpn_anchor_generator, (AnchorsGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor "
                                 "is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor "
                                 "is not specified")

        # 预测特征层的channels
        out_channels = backbone.out_channels

        # 若anchor生成器为空，则自动生成针对resnet50_fpn的anchor生成器
        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorsGenerator(
                anchor_sizes, aspect_ratios
            )

        # 生成RPN通过滑动窗口预测网络部分
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
            )

        # 默认rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
        # 默认rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        # 定义整个RPN框架
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)

        #  Multi-scale RoIAlign pooling
        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],  # 在哪些特征层进行roi pooling
                output_size=[7, 7],
                sampling_ratio=2)

        # fast RCNN中roi pooling后的展平处理两个全连接层部分
        if box_head is None:
            resolution = box_roi_pool.output_size[0]  # 默认等于7
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size
            )

        # 在box_head的输出上预测部分
        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        # 将roi pooling, box_head以及box_predictor结合在一起
        roi_heads = RoIHeads(
            # box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,  # 0.5  0.5
            box_batch_size_per_image, box_positive_fraction,  # 512  0.25
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)  # 0.05  0.5  100

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        # 对数据进行标准化，缩放，打包成batch等处理部分
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)

一直小老虎

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Faster-RCNN系列三（Framework代码，python）

Faster-RCNN系列三（Framework代码，python）Faster-RCNN系列主要分为两大部分组成，RPN网络结构Fast-RCNN网络结构这里引用@霹雳吧啦Wz的Faster-RCNN流程图再次分为4大模块：1、transform（归一化处理，resize）2、BACKbone（特征提取）3、RPN（建议框，建议框的损失）4、roi_head(预测结果和预测损失)5、transform（将图像映射回原图）定义FasterRCNNBase类，（目的：定义网络框架）
复制链接

扫一扫