信息增益损失函数(InformationGain Loss)
信息熵是对于信息的一种量化,是对于某种系统信息的一种量度,熵就是系统的平均信息量,也可以理解为某种信息出现的概率。
信息增益是在加入一个条件后,所得到的熵和原来的熵的差,具体形式如下:
输入:
-
形状: 预测值 内, 表示这预测每一类的概率,共 个类, 每一个预测 概率 的和为1: .
-
形状: 标签值: , 是一个整数值,其范围是 表示着在 个类中的索引。
-
形状: (可选) 信息增益矩阵 .作为第三个输入参数,. 如果 , 则它等价于多项式逻辑损失函数
输出:
形状:
计算公式: , 其中 表示 行 of .
#include <algorithm>
#include <cfloat>
#include <cmath>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/vision_layers.hpp"
// A generalization of MultinomialLogisticLossLayer that takes an "information gain"
// (infogain) matrix specifying the "value" of all label pairs.
// loss = -sum(infogain_mat * log(prob))/N ;
namespace caffe {
template <typename Dtype>
void InfogainLossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
// check the infogain matrix source.
if (bottom.size() < 3) {
CHECK(this->layer_param_.infogain_loss_param().has_source())
<< "Infogain matrix source must be specified.";
BlobProto blob_proto;
ReadProtoFromBinaryFile(
this->layer_param_.infogain_loss_param().source(), &blob_proto);
infogain_.FromProto(blob_proto);
}
}
template <typename Dtype>
void InfogainLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
Blob<Dtype>* infogain = NULL;
if (bottom.size() < 3) {
infogain = &infogain_;
} else {
infogain = bottom[2];
}
CHECK_EQ(bottom[1]->channels(), 1);
CHECK_EQ(bottom[1]->height(), 1);
CHECK_EQ(bottom[1]->width(), 1);
const int num = bottom[0]->num();
const int dim = bottom[0]->count() / num;
CHECK_EQ(infogain->num(), 1);
CHECK_EQ(infogain->channels(), 1);
CHECK_EQ(infogain->height(), dim);
CHECK_EQ(infogain->width(), dim);
}
template <typename Dtype>
void InfogainLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
const Dtype* infogain_mat = NULL;
if (bottom.size() < 3) {
infogain_mat = infogain_.cpu_data();
} else {
infogain_mat = bottom[2]->cpu_data();
}
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
Dtype loss = 0;
for (int i = 0; i < num; ++i) {
<span style="background-color: rgb(102, 102, 204);"> int label = static_cast<int>(bottom_label[i]);
for (int j = 0; j < dim; ++j)
// const float kLOG_THRESHOLD = 1e-20;
Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
loss -= infogain_mat[label * dim + j] * log(prob);
}
}</span>
top[0]->mutable_cpu_data()[0] = loss / num;
}
template <typename Dtype>
void InfogainLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down.size() > 2 && propagate_down[2]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to infogain inputs.";
}
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_label = bottom[1]->cpu_data();
const Dtype* infogain_mat = NULL;
if (bottom.size() < 3) {
infogain_mat = infogain_.cpu_data();
} else {
infogain_mat = bottom[2]->cpu_data();
}
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
int num = bottom[0]->num();
int dim = bottom[0]->count() / bottom[0]->num();
const Dtype scale = - top[0]->cpu_diff()[0] / num;
for (int i = 0; i < num; ++i) {
<span style="background-color: rgb(153, 153, 0);"> const int label = static_cast<int>(bottom_label[i]);
for (int j = 0; j < dim; ++j) {
// loss = -sum(infogain_mat * log(prob))/n ;
// loss' = - sum(infogain_mat * top_diff)/(N * prob)
Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
bottom_diff[i * dim + j] = scale * infogain_mat[label * dim + j] / prob;
}</span>
}
}
}
INSTANTIATE_CLASS(InfogainLossLayer);
REGISTER_LAYER_CLASS(InfogainLoss);
} // namespace caffe