公式
参数:两个输入bottom[0]、bottom[1],一个输出top[0]。
bottom[0]: N*C*1*1 预测值;
bottom[1]: N*1*1*1 真实值;
p: 范数,可选
L1、L2
范数;
δ{ln=k}
: 示性函数,如果第
n
个样本的真实标签为
tnk
: bottom[0]中,第
n
个样本,第
E=1N∑n=1N∑k=1K[max(0,1−δ{ln=k}tnk)]p
代码
(1)Forward
template <typename Dtype>
void HingeLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();//预测值tnk
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const Dtype* label = bottom[1]->cpu_data();//真实值(不用反向传播计算)
int num = bottom[0]->num();
int count = bottom[0]->count();
int dim = count / num;
//bottom_diff=bottom_data
caffe_copy(count, bottom_data, bottom_diff);
for (int i = 0; i < num; ++i) {
bottom_diff[i * dim + static_cast<int>(label[i])] *= -1;//bottom_diff=-1*tnk
}
for (int i = 0; i < num; ++i) {
for (int j = 0; j < dim; ++j) {
//bottom_diff=max(0,1-tnk)
bottom_diff[i * dim + j] = std::max(
Dtype(0), 1 + bottom_diff[i * dim + j]);
}
}
Dtype* loss = top[0]->mutable_cpu_data();
switch (this->layer_param_.hinge_loss_param().norm()) {
case HingeLossParameter_Norm_L1://L1范数
//caffe_cpu_asum:计算bottom_diff所有元素的绝对值之和
loss[0] = caffe_cpu_asum(count, bottom_diff) / num;//基于L1范数的hinge_loss计算公式
break;
case HingeLossParameter_Norm_L2://L2范数
//caffe_cpu_dot:计算bottom_diff*bottom_diff
loss[0] = caffe_cpu_dot(count, bottom_diff, bottom_diff) / num;//基于L2范数的hinge_loss计算公式
break;
default:
LOG(FATAL) << "Unknown Norm";
}
}
(2)Backward
bottom[1]是label的groundtruth,不需要进行反向传播运算,只需要对bottom[0]进行反向传播运算,反向传播是损失
E
对
2N∗hinge∗∂hinge∂tnk,记max(0,1−δ{ln=k}tnk)为hinge
其中:
∂hinge∂tnk={0,−1,hinge=0hinge>0}
template <typename Dtype>
void HingeLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {//label不需要做反向传播运算
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {//对bottom[0]进行反向传输运算
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const Dtype* label = bottom[1]->cpu_data();
int num = bottom[0]->num();
int count = bottom[0]->count();
int dim = count / num;
for (int i = 0; i < num; ++i) {
bottom_diff[i * dim + static_cast<int>(label[i])] *= -1;
}
const Dtype loss_weight = top[0]->cpu_diff()[0];
switch (this->layer_param_.hinge_loss_param().norm()) {
case HingeLossParameter_Norm_L1:
caffe_cpu_sign(count, bottom_diff, bottom_diff);
caffe_scal(count, loss_weight / num, bottom_diff);
break;
case HingeLossParameter_Norm_L2://L2范数
caffe_scal(count, loss_weight * 2 / num, bottom_diff);
break;
default:
LOG(FATAL) << "Unknown Norm";
}
}
}