caffe infogain_loss 信息熵代价函数

信息增益损失函数(InformationGain Loss)

信息熵是对于信息的一种量化,是对于某种系统信息的一种量度,熵就是系统的平均信息量,也可以理解为某种信息出现的概率。

caffe <wbr>study(3) <wbr>关于激活函数以及loss <wbr>function

信息增益是在加入一个条件后,所得到的熵和原来的熵的差,具体形式如下:

caffe <wbr>study(3) <wbr>关于激活函数以及loss <wbr>function

输入:

  1. 形状:  预测值   内, 表示这预测每一类的概率,共   个类, 每一个预测 概率  的和为1:  .

  2. 形状:  标签值:  , 是一个整数值,其范围是   表示着在   个类中的索引。

  3. 形状:  (可选) 信息增益矩阵  .作为第三个输入参数,. 如果  , 则它等价于多项式逻辑损失函数

输出:

形状:

计算公式:  , 其中   表示 行   of  .


#include <algorithm>
#include <cfloat>
#include <cmath>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/vision_layers.hpp"

// A generalization of MultinomialLogisticLossLayer that takes an "information gain" 
// (infogain) matrix specifying the "value" of all label pairs.
// loss = -sum(infogain_mat * log(prob))/N ;
namespace caffe {

template <typename Dtype>
void InfogainLossLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::LayerSetUp(bottom, top);
  // check the infogain matrix source.
  if (bottom.size() < 3) {
    CHECK(this->layer_param_.infogain_loss_param().has_source())
        << "Infogain matrix source must be specified.";
    BlobProto blob_proto;
    ReadProtoFromBinaryFile(
      this->layer_param_.infogain_loss_param().source(), &blob_proto);
    infogain_.FromProto(blob_proto);
  }
}

template <typename Dtype>
void InfogainLossLayer<Dtype>::Reshape(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  LossLayer<Dtype>::Reshape(bottom, top);
  Blob<Dtype>* infogain = NULL;
  if (bottom.size() < 3) {
    infogain = &infogain_;
  } else {
    infogain = bottom[2];
  }
  CHECK_EQ(bottom[1]->channels(), 1);
  CHECK_EQ(bottom[1]->height(), 1);
  CHECK_EQ(bottom[1]->width(), 1);
  const int num = bottom[0]->num();
  const int dim = bottom[0]->count() / num;
  CHECK_EQ(infogain->num(), 1);
  CHECK_EQ(infogain->channels(), 1);
  CHECK_EQ(infogain->height(), dim);
  CHECK_EQ(infogain->width(), dim);
}


template <typename Dtype>
void InfogainLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* bottom_label = bottom[1]->cpu_data();
  const Dtype* infogain_mat = NULL;
  if (bottom.size() < 3) {
    infogain_mat = infogain_.cpu_data();
  } else {
    infogain_mat = bottom[2]->cpu_data();
  }
  int num = bottom[0]->num();
  int dim = bottom[0]->count() / bottom[0]->num();
  Dtype loss = 0;
  for (int i = 0; i < num; ++i) {
   <span style="background-color: rgb(102, 102, 204);"> int label = static_cast<int>(bottom_label[i]);
    for (int j = 0; j < dim; ++j) 
	  // const float kLOG_THRESHOLD = 1e-20;
      Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
      loss -= infogain_mat[label * dim + j] * log(prob);
    }
  }</span>
  top[0]->mutable_cpu_data()[0] = loss / num;
}

template <typename Dtype>
void InfogainLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[1]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to label inputs.";
  }
  if (propagate_down.size() > 2 && propagate_down[2]) {
    LOG(FATAL) << this->type()
               << " Layer cannot backpropagate to infogain inputs.";
  }
  if (propagate_down[0]) {
    const Dtype* bottom_data = bottom[0]->cpu_data();
    const Dtype* bottom_label = bottom[1]->cpu_data();
    const Dtype* infogain_mat = NULL;
    if (bottom.size() < 3) {
      infogain_mat = infogain_.cpu_data();
    } else {
      infogain_mat = bottom[2]->cpu_data();
    }
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    int num = bottom[0]->num();
    int dim = bottom[0]->count() / bottom[0]->num();
    const Dtype scale = - top[0]->cpu_diff()[0] / num;
    for (int i = 0; i < num; ++i) {
   <span style="background-color: rgb(153, 153, 0);">   const int label = static_cast<int>(bottom_label[i]);
      for (int j = 0; j < dim; ++j) {
	    // loss = -sum(infogain_mat * log(prob))/n ;
		// loss' = - sum(infogain_mat * top_diff)/(N * prob)
        Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
        bottom_diff[i * dim + j] = scale * infogain_mat[label * dim + j] / prob;
      }</span>
    }
  }
}

INSTANTIATE_CLASS(InfogainLossLayer);
REGISTER_LAYER_CLASS(InfogainLoss);
}  // namespace caffe


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值