faster rcnn的源码理解(一)SmoothL1LossLayer论文与代码的结合理解

源码:

// ------------------------------------------------------------------
// Fast R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// ------------------------------------------------------------------

#include "caffe/fast_rcnn_layers.hpp"

namespace caffe {

template <typename Dtype>
__global__ void SmoothL1Forward(const int n, const Dtype* in, Dtype* out,
    Dtype sigma2) {
  // f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
  //        |x| - 0.5 / sigma / sigma    otherwise
  CUDA_KERNEL_LOOP(index, n) {
    Dtype val = in[index];
    Dtype abs_val = abs(val);
    if (abs_val < 1.0 / sigma2) {
      out[index] = 0.5 * val * val * sigma2;
    } else {
      out[index] = abs_val - 0.5 / sigma2;
    }
  }
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();
  caffe_gpu_sub(
      count,
      bottom[0]->gpu_data(),
      bottom[1]->gpu_data(),
      diff_.mutable_gpu_data());    // d := b0 - b1
  if (has_weights_) {
    // apply "inside" weights
    caffe_gpu_mul(
        count,
        bottom[2]->gpu_data(),
        diff_.gpu_data(),
        diff_.mutable_gpu_data());  // d := w_in * (b0 - b1)
  }
  SmoothL1Forward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
      count, diff_.gpu_data(), errors_.mutable_gpu_data(), sigma2_);
  CUDA_POST_KERNEL_CHECK;

  if (has_weights_) {
    // apply "outside" weights
    caffe_gpu_mul(
        count,
        bottom[3]->gpu_data(),
        errors_.gpu_data(),
        errors_.mutable_gpu_data());  // d := w_out * SmoothL1(w_in * (b0 - b1))
  }

  Dtype loss;
  caffe_gpu_dot(count, ones_.gpu_data(), errors_.gpu_data(), &loss);
  top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num();
}

template <typename Dtype>
__global__ void SmoothL1Backward(const int n, const Dtype* in, Dtype* out,
    Dtype sigma2) {
  // f'(x) = sigma * sigma * x         if |x| < 1 / sigma / sigma
  //       = sign(x)                   otherwise
  CUDA_KERNEL_LOOP(index, n) {
    Dtype val = in[index];
    Dtype abs_val = abs(val);
    if (abs_val < 1.0 / sigma2) {
      out[index] = sigma2 * val;
    } else {
      out[index] = (Dtype(0) < val) - (val < Dtype(0));
    }
  }
}

template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  // after forwards, diff_ holds w_in * (b0 - b1)
  int count = diff_.count();
  SmoothL1Backward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
      count, diff_.gpu_data(), diff_.mutable_gpu_data(), sigma2_);
  CUDA_POST_KERNEL_CHECK;
  for (int i = 0; i < 2; ++i) {
    if (propagate_down[i]) {
      const Dtype sign = (i == 0) ? 1 : -1;
      const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
      caffe_gpu_axpby(
          count,                           // count
          alpha,                           // alpha
          diff_.gpu_data(),                // x
          Dtype(0),                        // beta
          bottom[i]->mutable_gpu_diff());  // y
      if (has_weights_) {
        // Scale by "inside" weight
        caffe_gpu_mul(
            count,
            bottom[2]->gpu_data(),
            bottom[i]->gpu_diff(),
            bottom[i]->mutable_gpu_diff());
        // Scale by "outside" weight
        caffe_gpu_mul(
            count,
            bottom[3]->gpu_data(),
            bottom[i]->gpu_diff(),
            bottom[i]->mutable_gpu_diff());
      }
    }
  }
}

INSTANTIATE_LAYER_GPU_FUNCS(SmoothL1LossLayer);

}  // namespace caffe

SmoothL1LossLayer 计算一张图片的损失函数,对应于下图的加号右边部分

 

imini-batchanchor的索引。

Pi是目标的预测概率。

有物体时pi*1,否则为0

ti是一个向量,预测坐标

ti*是一个向量,是gt包围盒的坐标

 

bottom[0]预测坐标,对应于下图的ti

bottom[1]target坐标,对应于下图的ti*

bottom[2]inside,有物体(fg)时为1,否则为0,对应于下图的pi*

bottom[3]outside,没有前景(fg)也没有后景(bg)的为0,其他为1/bg+fg),对应于加号右边的系数部分(但其实这个地方我本人还是不懂,因为论文上说的系数都是一些固定的值,如入=10。初始代码一直在更新,估计又换了别的方法。不论如何,在现在的代码中outside是乘以了后面的结果)

 

Lreg的公式就是下图,另x=ti - ti*

 

 

Pi*Leg(ti, ti*)表明只有有fg20个物体类别)的才有回归损失



  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
Faster R-CNN 中的 smooth L1 loss 修改为 IoU loss 可以尝试如下代码实现: ```python import torch def iou_loss(pred_bbox, gt_bbox, eps=1e-6): """ Compute IoU loss between predicted bboxes and ground truth bboxes. Args: pred_bbox: predicted bboxes, shape [N, 4] gt_bbox: ground truth bboxes, shape [N, 4] eps: epsilon to avoid divide by zero Returns: iou_loss: IoU loss between predicted bboxes and ground truth bboxes, shape [N] """ # compute IoU x1 = torch.max(pred_bbox[:, 0], gt_bbox[:, 0]) y1 = torch.max(pred_bbox[:, 1], gt_bbox[:, 1]) x2 = torch.min(pred_bbox[:, 2], gt_bbox[:, 2]) y2 = torch.min(pred_bbox[:, 3], gt_bbox[:, 3]) w = torch.clamp(x2 - x1, min=0) h = torch.clamp(y2 - y1, min=0) inter = w * h a1 = (pred_bbox[:, 2] - pred_bbox[:, 0]) * (pred_bbox[:, 3] - pred_bbox[:, 1]) a2 = (gt_bbox[:, 2] - gt_bbox[:, 0]) * (gt_bbox[:, 3] - gt_bbox[:, 1]) union = a1 + a2 - inter iou = inter / (union + eps) # compute IoU loss threshold = 0.5 iou_loss = torch.pow(iou - threshold, 2) return iou_loss # example usage pred_bbox = torch.tensor([[2.0, 3.0, 5.0, 6.0], [1.0, 2.0, 4.0, 5.0]]) gt_bbox = torch.tensor([[1.0, 2.0, 4.0, 5.0], [2.0, 3.0, 5.0, 6.0]]) loss = iou_loss(pred_bbox, gt_bbox) print(loss) ``` 然后将 Faster R-CNN 中的 smooth L1 loss 替换为 iou loss,如下所示: ```python import torch import torch.nn as nn def iou_loss(pred_bbox, gt_bbox, eps=1e-6): """ Compute IoU loss between predicted bboxes and ground truth bboxes. Args: pred_bbox: predicted bboxes, shape [N, 4] gt_bbox: ground truth bboxes, shape [N, 4] eps: epsilon to avoid divide by zero Returns: iou_loss: IoU loss between predicted bboxes and ground truth bboxes, shape [N] """ # compute IoU x1 = torch.max(pred_bbox[:, 0], gt_bbox[:, 0]) y1 = torch.max(pred_bbox[:, 1], gt_bbox[:, 1]) x2 = torch.min(pred_bbox[:, 2], gt_bbox[:, 2]) y2 = torch.min(pred_bbox[:, 3], gt_bbox[:, 3]) w = torch.clamp(x2 - x1, min=0) h = torch.clamp(y2 - y1, min=0) inter = w * h a1 = (pred_bbox[:, 2] - pred_bbox[:, 0]) * (pred_bbox[:, 3] - pred_bbox[:, 1]) a2 = (gt_bbox[:, 2] - gt_bbox[:, 0]) * (gt_bbox[:, 3] - gt_bbox[:, 1]) union = a1 + a2 - inter iou = inter / (union + eps) # compute IoU loss threshold = 0.5 iou_loss = torch.pow(iou - threshold, 2) return iou_loss.mean() class FasterRCNN(nn.Module): def __init__(self, num_classes): super().__init__() self.num_classes = num_classes self.backbone = ... self.rpn = ... self.roi_head = ... self.bbox_head = nn.Linear(4096, 4 * self.num_classes) self.cls_head = nn.Linear(4096, self.num_classes) def forward(self, x, gt_bbox=None): # backbone x = self.backbone(x) # RPN rpn_cls, rpn_bbox = self.rpn(x) # RoI pooling rois = self.roi_head(x, rpn_bbox) # bbox regression bbox_pred = self.bbox_head(rois) bbox_pred = bbox_pred.reshape(-1, 4) # classification cls_score = self.cls_head(rois) cls_score = cls_score.reshape(-1, self.num_classes) cls_prob = nn.functional.softmax(cls_score, dim=1) # test or train if self.training: # compute loss rpn_loss, roi_loss = ... bbox_loss = iou_loss(bbox_pred, gt_bbox) cls_loss = ... total_loss = rpn_loss + roi_loss + bbox_loss + cls_loss return total_loss else: # inference result = ... return result ``` 需要注意的是,IoU loss 可能会导致梯度爆炸或梯度消失的问题,因此需要进行一些处理,例如使用渐进式策略或者加入正则化项等。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值