caffe学习记录(四)：如何配置caffe忽略掉某些不参与训练的标签

最新推荐文章于 2023-05-29 19:53:54 发布

wwww1244

最新推荐文章于 2023-05-29 19:53:54 发布

阅读量2.5k

点赞数

分类专栏：深度学习实践

本文链接：https://blog.csdn.net/wwww1244/article/details/81383647

版权

深度学习实践专栏收录该内容

10 篇文章 0 订阅

订阅专栏

考虑MTCNN的训练场景，要求负样本只参与分类训练，而part样本只参与回归训练，正样本则要同时参与分类和回归训练，我们在数据集将不需要参与训练的部分标注为-1，如下图所示：

但是，如果直接使用这样的数据集进行训练的话，这些label=-1的样本也会参与计算梯度，这样显然是不对的。

那么，如何配置caffe使其忽略掉这些label=-1的数据呢？换句话说，我们需要caffe在回传梯度时将这些label=-1的样本梯度设置为0，这样，这些样本就不会参与训练了。

事实上，caffe中的LossLayer层提供了ignore_label参数，当该参数被设置时，程序会忽略ignore_label对应项的loss。caffe中所有loss层都是LossLayer层的子类，但并不是所有loss层都实现了ignore_label功能，SoftmaxWithLoss层可以直接使用这一功能，但EuclideanLoss层就没有这一功能。

我们的所有修改都在prototxt文件中的Loss层部分，对于训练过程中的其它步骤，和以前中提到的多标签分类/回归训练过程完全一致。

一、SoftmaxWithLoss层的ignore_label

调用方法如下：

layer { 
  name: "loss_cls" 
  type: "SoftmaxWithLoss" 
  bottom: "conv4-1" 
  bottom: "label_cls" 
  top: "loss_cls"
  loss_weight: 1
  loss_param{
    ignore_label: -1
  }
}

显然，只需要在loss_param中添加一个ignore_label项就可以解决问题。

二、EuclideanLoss层的ignore_label

接下来，我们需要修改caffe源码，来为EuclideanLoss层添加ignore_label，添加后的调用方法应该和SoftmaxWithLoss层的调用方法完全一致。

总的来说，我们需要修改以下几个文件：

在EuclideanLoss层的基础上添加新层：EuclideanSpIgnoreLossLayer，将include/caffe/layers/euclidean_loss_layer.hpp、src/caffe/layers/euclidean_loss_layer.cpp和src/caffe/layers/euclidean_loss_layer.cu分别拷贝一份，命名为include/caffe/layers/euclidean_sp_ignore_loss_layer.hpp、src/caffe/layers/euclidean_sp_ignore_loss_layer.cpp和src/caffe/layers/euclidean_sp_ignore_loss_layer.cu。
修改include/caffe/layers/euclidean_sp_ignore_loss_layer.hpp文件
修改src/caffe/layers/euclidean_sp_ignore_loss_layer.cpp文件
修改src/caffe/layers/euclidean_sp_ignore_loss_layer.cu文件

接下来就是这些文件的修改版，其中，//###表示修改的部分，主要改动就是添加根据label将diff设置为0的代码：

1. include/caffe/layers/euclidean_sp_ignore_loss_layer.hpp：

在类声明中添加如下部分：

  //###
  /// Whether to ignore instances with a certain label.
  bool has_ignore_label_;
  /// The label indicating that an instance should be ignored.
  int ignore_label_;
  int num_nonzero_;

2. src/caffe/layers/euclidean_sp_ignore_loss_layer.cpp：

可以对照下面的文件进行修改。

#include <vector>

#include "caffe/layers/euclidean_sp_ignore_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void EuclideanSpIgnoreLossLayer<Dtype>::Reshape(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::Reshape(bottom, top);
  CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
      << "Inputs must have the same dimension.";
  diff_.ReshapeLike(*bottom[0]);

  //###
  has_ignore_label_ =
    this->layer_param_.loss_param().has_ignore_label();
  if (has_ignore_label_) {
    ignore_label_ = this->layer_param_.loss_param().ignore_label();
  }
}

template <typename Dtype>
void EuclideanSpIgnoreLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();        // batch-size * channels
  int num = bottom[0]->num();            // batch-size
  int channels = bottom[0]->channels();  // channels (4 or 10)

  //###
  caffe_sub(                             // 按元素相减
      count,
      bottom[0]->cpu_data(),
      bottom[1]->cpu_data(),
      diff_.mutable_cpu_data());

  num_nonzero_ = num;                    // 找到需要被忽略的项，将diff置0
  if (has_ignore_label_) {
    const Dtype* label_data = bottom[1]->cpu_data();
    Dtype* diff_data = diff_.mutable_cpu_data();
    for (int i = 0; i < num; ++i) {

      bool ignore = true;
      for (int j = 0; j < channels; ++j) {
        const int label_value = static_cast<int>(label_data[i * channels + j]);
        if (label_value != ignore_label_)
          ignore = false;
      }

      if (ignore) {
        for (int j = 0; j < channels; ++j)
          diff_data[i * channels + j] = 0;
        num_nonzero_ --;
      }
    }
  }

  Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());  // 按元素相乘再相加
  // Dtype loss = dot / bottom[0]->num() / Dtype(2);
  Dtype loss = dot / Dtype(2.0*num_nonzero_);                            // 除以2N（非0项）
  top[0]->mutable_cpu_data()[0] = loss;
}

template <typename Dtype>
void EuclideanSpIgnoreLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

  for (int i = 0; i < 2; ++i) {
    if (propagate_down[i]) {
      const Dtype sign = (i == 0) ? 1 : -1;

      //###
      // const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); 
      const Dtype alpha = sign * top[0]->cpu_diff()[0] / num_nonzero_;        
      
      caffe_cpu_axpby(
          bottom[i]->count(),              // count
          alpha,                              // alpha
          diff_.cpu_data(),                   // a
          Dtype(0),                           // beta
          bottom[i]->mutable_cpu_diff());  // b
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(EuclideanSpIgnoreLossLayer);
#endif

INSTANTIATE_CLASS(EuclideanSpIgnoreLossLayer);
REGISTER_LAYER_CLASS(EuclideanSpIgnoreLoss);

}  // namespace caffe

2. src/caffe/layers/euclidean_sp_ignore_loss_layer.cu：

可以对照下面的文件进行修改。

#include <vector>

#include "caffe/layers/euclidean_sp_ignore_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void EuclideanSpIgnoreLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  int count = bottom[0]->count();        // batch-size * channels
  int num = bottom[0]->num();            // batch-size
  int channels = bottom[0]->channels();  // channels (4 or 10)

  //###
  caffe_gpu_sub(                         // 按元素相减
      count,
      bottom[0]->gpu_data(),
      bottom[1]->gpu_data(),
      diff_.mutable_gpu_data());

  num_nonzero_ = num;                    // 找到需要被忽略的项，将diff置0
  if (has_ignore_label_) {
    const Dtype* label_data = bottom[1]->gpu_data();
    Dtype* diff_data = diff_.mutable_gpu_data();
    for (int i = 0; i < num; ++i) {

      bool ignore = true;
      for (int j = 0; j < channels; ++j) {
        const int label_value = static_cast<int>(label_data[i * channels + j]);
        if (label_value != ignore_label_)
          ignore = false;
      }

      if (ignore) {
        for (int j = 0; j < channels; ++j)
          diff_data[i * channels + j] = 0;
        num_nonzero_ --;
      }
    }
  }
  
  Dtype dot;
  caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);  // 按元素相乘再相加
  // Dtype loss = dot / bottom[0]->num() / Dtype(2);
  Dtype loss = dot / Dtype(2.0*num_nonzero_);                      // 除以2N（非0项）
  top[0]->mutable_cpu_data()[0] = loss;
}

template <typename Dtype>
void EuclideanSpIgnoreLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  for (int i = 0; i < 2; ++i) {
    if (propagate_down[i]) {
      const Dtype sign = (i == 0) ? 1 : -1;

      //###
      // const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
      const Dtype alpha = sign * top[0]->cpu_diff()[0] / num_nonzero_;

      caffe_gpu_axpby(
          bottom[i]->count(),              // count
          alpha,                              // alpha
          diff_.gpu_data(),                   // a
          Dtype(0),                           // beta
          bottom[i]->mutable_gpu_diff());  // b
    }
  }
}

INSTANTIATE_LAYER_GPU_FUNCS(EuclideanSpIgnoreLossLayer);

}  // namespace caffe

到这里为止，程序就修改完成了，编译caffe，不出问题的话，此时EuclideanSpIgnoreLoss层就可以支持ignore_label的添加了。

调用方法如下：

layer { 
  name: "loss_box" 
  type: "EuclideanSpIgnoreLoss" 
  bottom: "conv4-2"
  bottom: "label_box" 
  top: "loss_box"
  loss_weight: 0.5
  loss_param{
    ignore_label: -1
  }
}

意为：如果该层的所有输入标签均为-1，则忽略这个样本的loss。

wwww1244

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
1
评论
caffe学习记录(四)：如何配置caffe忽略掉某些不参与训练的标签

考虑MTCNN的训练场景，要求负样本只参与分类训练，而part样本只参与回归训练，正样本则要同时参与分类和回归训练，我们在数据集将不需要参与训练的部分标注为-1，如下图所示：但是，如果直接使用这样的数据集进行训练的话，这些label=-1的样本也会参与计算梯度，这样显然是不对的。那么，如何配置caffe使其忽略掉这些label=-1的数据呢？换句话说，我们需要caffe在回传梯度时将这...
复制链接

扫一扫