Caffe源码解读(一)------loss层之softmax_loss_layer.cpp

本文深入解析Caffe中用于损失计算的softmax_loss_layer.cpp源代码,探讨其在深度学习模型训练中的作用,帮助理解Caffe的内部工作原理。
摘要由CSDN通过智能技术生成

这几天需要自己写个loss层,因此把caffe源码研读了下,在此记录下经验,方便后人以及自己日后复习。
首先看看caffe前向传播的工作流程,即net.cpp中的ForwardFromTo函数:
可以看到,该函数通过一个for循环来对每一个layer执行前向传播传播。具体的传播函数在layer.hpp中可以找到:
可以看到在这个函数中,最主要的就是执行了Reshape和Forward_cpu函数,剩下的就是对数据的一些处理,我们暂时先不用管它。然后由于Reshape和Forward_cpu都是虚函数,都是由具体的子类来实现的,即softmax_loss_layer.cpp中。接下来我们仔细看看softmax_loss_layer.cpp:
 
namespace caffe { template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::LayerSetUp(bottom, top);
LayerParameter softmax_param(this->layer_param_); softmax_param.set_type("Softmax");
//softmax loss涉及到两层计算,第一层是通过softmax函数计算出每一个分类的概率,第二层
//则是通过概率计算出最终的损失,在caffe中是将这两步操作分开在了两层中,分别是softmax_layer
//和softmax_loss_layer。下面就是创建softmax_layer来进行概率的计算
softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_param);
softmax_bottom_vec_.clear(); softmax_bottom_vec_.push_back(bottom[0]); softmax_top_vec_.clear(); softmax_top_vec_.push_back(&prob_); softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_); has_ignore_label_ = this->layer_param_.loss_param().has_ignore_label(); if (has_ignore_label_) { ignore_label_ = this->layer_param_.loss_param().ignore_label(); } if (!this->layer_param_.loss_param().has_normalization() && this->layer_param_.loss_param().has_normalize()) { normalization_ = this->layer_param_.loss_param().normalize() ? LossParameter_NormalizationMode_VALID : LossParameter_NormalizationMode_BATCH_SIZE; } else { normalization_ = this->layer_param_.loss_param().normalization(); } } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::Reshape(bottom, top); softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); softmax_axis_ = bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); outer_num_ = bottom[0]->count(0, softmax_axis_); inner_num_ = bottom[0]->count(softmax_axis_ + 1); CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) << "Number of labels must match number of predictions; " << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " << "label count (number of labels) must be N*H*W, " << "with integer values in {0, 1, ..., C-1}."; if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); } } template <typename Dtype> Dtype SoftmaxWithLossLayer<Dtype>::get_normalizer( LossParameter_NormalizationMode normalization_mode, int valid_count) { Dtype normalizer; switch (normalization_mode) { case LossParameter_NormalizationMode_FULL: normalizer = Dtype(outer_num_ * inner_num_); break; case LossParameter_NormalizationMode_VALID: if (valid_count == -1) { normalizer = Dtype(outer_num_ * inner_num_); } else { normalizer = Dtype(valid_count); } break; case LossParameter_NormalizationMode_BATCH_SIZE: normalizer = Dtype(outer_num_); break; case LossParameter_NormalizationMode_NONE: normalizer = Dtype(1); break; default: LOG(FATAL) << "Unknown normalization mode: " << LossParameter_NormalizationMode_Name(normalization_mode); } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-task setup. The max prevents NaNs in that case. return std::max(Dtype(1.0), normalizer); } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); const Dtype* prob_data = prob_.cpu_data(); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, prob_.shape(softmax_axis_)); loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN))); ++count; } } top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); } } template <typename Dtype> void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff); const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; ++j) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; ++count; } } } // Scale gradient Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); caffe_scal(prob_.count(), loss_weight, bottom_diff); } } #ifdef CPU_ONLY STUB_GPU(SoftmaxWithLossLayer); #endif INSTANTIATE_CLASS(SoftmaxWithLossLayer); REGISTER_LAYER_CLASS(SoftmaxWithLoss); }

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值