参考
在Caffe源码有关Loss层中在反向传播函数中,都能够看到如下代码,赋予loss_weight值为top[0]->cpu_diff()[0]的值。
// Scale gradient
Dtype loss_weight = top[0]->cpu_diff()[0] /
get_normalizer(normalization_, count);
caffe_scal(prob_.count(), loss_weight, bottom_diff);
但是在源码中查找top[0]->cpu_diff()却发现Loss层中此值并未初始化或者被赋值。
那么,top[0]->cpu_diff()里面到底存储的是什么值呢?
- 其实,在Loss层中,top_diff()[0]的默认值对应于Loss层中loss_weight的值,一般设置成1,能够在caffe源码能够找到出处。
- layer.cpp中的SetLossWeights函数看出,通过this->set_loss()设置对应Layer层的私有变量loss_,其值是param.loss_weight。
/**
* Called by SetUp to initialize the weights associated with any top blobs in
* the loss function. Store non-zero loss weights in the diff blob.
*/
inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
const int num_loss_weights = layer_param_.loss_weight_size();
if (num_loss_weights) {
CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
"unspecified or specified once per top blob.";
for (int top_id = 0; top_id < top.size(); ++top_id) {
// 取得loss_weight参数值
const Dtype loss_weight = layer_param_.loss_weight(top_id);
if (loss_weight == Dtype(0)) { continue; }
this->set_loss(top_id, loss_weight);
const int count = top[top_id]->count();
Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
// 将loss_weight赋值给top层的cpu_diff()
caffe_set(count, loss_weight, loss_multiplier);
}
}
}
- 然后进入Net.cpp中找到Init()函数中如下所示代码,可以看到blob_loss_weights_层的私有变量被设置成了layer->loss(top_id)
for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
}
// 设置blob_loss_weights相应层的loss_weight值,其值由私有变量loss_决定
blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
LOG_IF(INFO, Caffe::root_solver())
<< "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
if (layer->loss(top_id)) {
LOG_IF(INFO, Caffe::root_solver())
<< " with loss weight " << layer->loss(top_id);
}
memory_used_ += top_vecs_[layer_id][top_id]->count();
}