#include <vector>
#include "caffe/layers/center_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe{
template<typename Dtype>
void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
LossLayer<Dtype>::LayerSetUp(bottom, top);
CHECK_EQ(bottom[0]->num(), bottom[1]->num());
int channels = bottom[0]->channels();
int num = bottom[0]->num();
//获取center loss层的参数
alpha = this->layer_param_.center_loss_param().alpha();
lossWeight = this->layer_param_.center_loss_param().loss_weight();
clusterNum = this->layer_param_.center_loss_param().cluster_num();
//center_info每次更新,在初始化的时候确定中心的大小:有几类:clusterNum,每类特征点长度:channels
center_info_.Reshape(clusterNum, channels, 1, 1);
//center_loss_由feature减去center直接得到,num:批处理的数据大小
center_loss_.Reshape(num, channels, 1, 1);
center_update_count_.resize(clusterNum);
caffe_set(clusterNum * channels, Dtype(0.0), center_info_.mutable_cpu_data());
}
template<typename Dtype>
void CenterLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*> &bottom,
const vector<Blob<Dtype>*> &top){
//bottom0对应的数输入数据的特征向量值
//bottom1对应的是输入特征向量的标签
const Dtype *feature = bottom[0]->cpu_data();
const Dtype *label = bottom[1]->cpu_data();
//这个num是数据样本的个数,也可以对应batch个数,就是批处理的个数
int num = bottom[0]->num();
//channel对应于每个样本的特征向量的长度
int channels = bottom[0]->channels();
Dtype loss = 0;
//每次都将center_info的diff置零
caffe_set(clusterNum * channels, Dtype(0.0), center_info_.mutable_cpu_diff());
//clusterNum是训练样本总共的类别数
for(int i = 0; i < clusterNum; ++i){
center_update_count_[i] = 1;
}
for(int i = 0; i < num; ++i){
int targetLabel = label[i];
//将该样本下的每个样本都减去center,并赋值给center loss
//这里得到的center loss就是最后回传的center信息,就是将特征减去center
caffe_sub(channels, feature + i * channels,
center_info_.cpu_data() + targetLabel * channels,
center_loss_.mutable_cpu_data() + i * channels);
// store the update loss and number
// 将center损失与center应该更新的diff相加,
caffe_add(channels, center_loss_.cpu_data() + i * channels,
center_info_.cpu_diff() + targetLabel * channels,
center_info_.mutable_cpu_diff() + targetLabel * channels);
center_update_count_[targetLabel]++;
//将center loss进行点积并累加,再乘以损失center loss 权重
loss += caffe_cpu_dot(channels, center_loss_.cpu_data() + i * channels,
center_loss_.cpu_data() + i * channels) * lossWeight / Dtype(2.0) / static_cast<Dtype>(num);
}
// loss是累加的损失
top[0]->mutable_cpu_data()[0] = loss;
// update center loss.
// 更新center loss的center信息,就是将每个对应的center乘以一个系数
for(int i = 0; i < clusterNum; ++i){
Dtype scale = -alpha * lossWeight / Dtype(center_update_count_[i]);
caffe_scal(channels, scale, center_info_.mutable_cpu_diff() + i * channels);
}
//更新 data_ 的数据,就是data_减去 diff_
//更新center
center_info_.Update();
}
template<typename Dtype>
void CenterLossLayer<Dtype>::Backward_cpu(
const vector<Blob<Dtype>*> &top,
const vector<bool> &propagate_down,
const vector<Blob<Dtype>*> &bottom){
int num = bottom[0]->num();
int channels = bottom[0]->channels();
caffe_scal(num * channels, lossWeight, center_loss_.mutable_cpu_data());
Dtype *out = bottom[0]->mutable_cpu_diff();
caffe_copy(num * channels, center_loss_.cpu_data(), out);
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
Forward_cpu(bottom, top);
}
template<typename Dtype>
void CenterLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){
Backward_cpu(top, propagate_down,bottom);
}
INSTANTIATE_CLASS(CenterLossLayer);
REGISTER_LAYER_CLASS(CenterLoss);
}
center loss代码注释(caffe新添加层)
最新推荐文章于 2022-08-17 14:15:16 发布