caffe源码_自定义加法层

最新推荐文章于 2021-10-07 11:06:19 发布

山野闲者

最新推荐文章于 2021-10-07 11:06:19 发布

阅读量462

点赞数

分类专栏： caffe deep learning C++

本文链接：https://blog.csdn.net/lantuxin/article/details/83543791

版权

deep learning 同时被 3 个专栏收录

14 篇文章 0 订阅

订阅专栏

C++

9 篇文章 0 订阅

订阅专栏

caffe

7 篇文章 0 订阅

订阅专栏

参考网站1：https://blog.csdn.net/kkk584520/article/details/52721838

参考网站2：https://blog.csdn.net/seven_first/article/details/47378697#1-caffecpugemm-%E5%87%BD%E6%95%B0

系统：Ubuntu16.04

环境：caffe

知识背景：了解caffe的基本组成部分，能读懂C++语言，会GDB调试

原则：能用参考网站2中的函数，就不要自己写，不然会很麻烦

1.知识简介

1）了解参考网站2中的各种函数用法，这里以caffe_add()为例做说明：

template <>
void caffe_add<float>(const int n, const float* a, const float* b,
    float* y) {
  vsAdd(n, a, b, y);
}

函数实现功能：y=a+b，其中n表示相加的个数。

实际操作：在/caffe/util/math_functions.hpp头文件中同样有对caffe常用Dtype类型的数据进行处理的函数声明。

template <typename Dtype>
void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);

因此，这里可以直接对当前层的多个输入进行相加，如两个输入bottom[0]->cpu_data()与bottom[1]->cpu_data()，相加实现方式如下：

caffe_add(bottom[0]->count(),bottom[0]->cpu_data(),bottom[1]->cpu_data(),top[0]->mutable_cpu_data());

ps：由于这个过程实现两个Dtype输入数据的加法，因此，需要满足两个输入数据大小，维度完全相同，即在使用之前，可以通过以下方式校验俩输入的batchsize，维度是否相同：

CHECK_EQ(bottom[1]->num(),bottom[0]->num());
CHECK_EQ(bottom[1]->count(),bottom[0]->count());

2）新建一个层若想连接多个输入层，多个输出层，则需要声明下面两个函数，没有声明的情况下都是默认1：

virtual inline int ExactNumBottomBlobs() const { return 2; }//连接两个输入层
virtual inline int ExactNumTopBlobs() const { return 1; }

2.实现步骤：

step1：编写自定义层的头文件（caffe中的层都是以类的数据结构进行定义的，类中的方法主要包括type、forward、backward等方法）；

step2：编写自定义层的源文件（实现step1头文件中声明的层方法，在实现时，一定要在最后注册层参数，即用INSTANTIATE_CLASS、REGISTER_LAYER_CLASS注册）；

step3：在caffe.proto中添加层参数ID(message LayerParameter中)，以及添加自定义的层参数。
下面直接给代码：

头文件/caffe/include/caffe/layers/conv_mix_layer.hpp：

#ifndef CAFFE_CONV_MIX_LAYER_HPP_
#define CAFFE_CONV_MIX_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/math_functions.hpp"

#include "caffe/layers/neuron_layer.hpp"

namespace caffe {

template <typename Dtype>
class ConvolutionMixLayer : public NeuronLayer<Dtype> {
 public:
  explicit ConvolutionMixLayer(const LayerParameter& param)
      : NeuronLayer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  virtual inline const char* type() const { return "ConvolutionMix"; }

  virtual inline int ExactNumBottomBlobs() const { return 2; }//最少的输入blob为1
  virtual inline int ExactNumTopBlobs() const { return 1; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  Dtype key_scale;
};

}  // namespace caffe

#endif  // CAFFE_CONV_LAYER_HPP_

源文件/caffe/src/caffe/layers/conv_mix_layer.cpp：

#include <vector>

#include "caffe/layers/conv_mix_layer.hpp"

namespace caffe {

template <typename Dtype>
void ConvolutionMixLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
  CHECK((convolution_mix_param.has_key_scale()))
      << "**Please set the key_scale value of conv_mix_param.**";
  top[0]->Reshape(bottom[0]->shape()); 
}
//声明了必须实现，搞不懂，最多应该有一个warning呀。
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

      }
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
  const Dtype* bottom_data1 = bottom[0]->cpu_data();
  const Dtype* bottom_data2 = bottom[1]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  key_scale = static_cast<float>(convolution_mix_param.key_scale());
  CHECK_EQ(bottom[1]->num(),bottom[0]->num());
  CHECK_EQ(bottom[1]->count(),bottom[0]->count());
  const int count = bottom[0]->count();
  // caffe_axpy(bottom[0]->num(),key_scale,bottom_data1,top_data);//top_data=1×bottom_data1+top_data
  // caffe_axpy(bottom[1]->num(),key_scale,bottom_data2,top_data);
  // for (int i=0; i<count;++i)
  // {
  //   top_data[i]=(bottom_data1[i]+bottom_data2[i])*key_scale;
  // }
  caffe_add(count,bottom_data1,bottom_data2,top_data);
  caffe_scal(count,key_scale,top_data);
}

template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int count = top[0]->count(); 
  const Dtype* top_diff = top[0]->cpu_diff(); 
  //读取我们实际指定的梯度放缩倍数，注意我们的参数名为key_scale 
  key_scale = this->layer_param_.convolution_mix_param().key_scale();
  if (propagate_down[0]) {
       Dtype* bottom_diff1 = bottom[0]->mutable_cpu_diff(); 
       Dtype* bottom_diff2 = bottom[1]->mutable_cpu_diff(); 
       caffe_cpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff1);
       caffe_cpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff2);
  }
}

#ifdef CPU_ONLY
STUB_GPU(ConvolutionMixLayer);
#endif

INSTANTIATE_CLASS(ConvolutionMixLayer);
REGISTER_LAYER_CLASS(ConvolutionMix);
}  // namespace caffe

/caffe/src/caffe/proto/caffe.proto中添加层信息、参数：

message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  optional ConvolutionMixParameter convolution_mix_param = 149;
//此处省略其他参数……
}

message ConvolutionMixParameter{
  optional float key_scale = 1 [default = 1];
}

使用方式/caffe/examples/mnist/zhou_test.prototxt：


layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
#add
#2_1
layer {
  name: "conv2_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
#2_2
layer {
  name: "conv2_2"
  type: "Convolution"
  bottom: "conv2_1"
  top: "conv2_2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}

#Deconvolution
layer {
  name: "deconv"
  type: "Deconvolution"
  bottom: "conv2_2"
  top: "deconv"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
#上面的做的是控制conv2_1与deconv的输入一致
# mix
#在train_test.prototxt中添加
#conv2_1与deconv的batchsize以及维度都应该相同
layer {
  name: "convmix"
  type: "ConvolutionMix"
  bottom: "conv2_1"
  bottom: "deconv"
  top: "convmix"
  convolution_mix_param{
      key_scale:0.5
  }
}
#后面是全连接层

/caffe/examples/mnist/solver.prototxt

# The train/test net protocol buffer definition
net: "examples/mnist/train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/test"
# solver mode: CPU or GPU
solver_mode: CPU

以上过程完成，编译运行，只能实现cpu训练，下一步就是要转换为gpu进行训练：

要点1：将cpu实现源码中的cpu相关函数全部改为gpu；

要点2：caffe_add改为caffe_gpu_add，caffe_scal改为caffe_gpu_scal；

要点3：层注册改为INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionMixLayer);

gpu实现源码/caffe/src/caffe/layers/conv_mix_layer.cu

#include <vector>

#include "caffe/layers/conv_mix_layer.hpp"

namespace caffe {

template <typename Dtype>
void ConvolutionMixLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
  CHECK((convolution_mix_param.has_key_scale()))
      << "**Please set the key_scale value of conv_mix_param.**";
  top[0]->Reshape(bottom[0]->shape()); 
}
//声明了必须实现，搞不懂，最多应该有一个warning呀。
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {

      }
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
  const Dtype* bottom_data1 = bottom[0]->gpu_data();
  const Dtype* bottom_data2 = bottom[1]->gpu_data();
  Dtype* top_data = top[0]->mutable_gpu_data();
  key_scale = static_cast<float>(convolution_mix_param.key_scale());
  CHECK_EQ(bottom[1]->num(),bottom[0]->num());
  CHECK_EQ(bottom[1]->count(),bottom[0]->count());
  const int count = bottom[0]->count();
  // caffe_axpy(bottom[0]->num(),key_scale,bottom_data1,top_data);//top_data=1×bottom_data1+top_data

  caffe_gpu_add(count,bottom_data1,bottom_data2,top_data);
  caffe_gpu_scal(count,key_scale,top_data);
}

template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int count = top[0]->count(); 
  const Dtype* top_diff = top[0]->gpu_diff(); 
  //读取我们实际指定的梯度放缩倍数，注意我们的参数名为key_scale 
  key_scale = this->layer_param_.convolution_mix_param().key_scale();
  if (propagate_down[0]) {
       Dtype* bottom_diff1 = bottom[0]->mutable_gpu_diff(); 
       Dtype* bottom_diff2 = bottom[1]->mutable_gpu_diff(); 
       caffe_gpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff1);
       caffe_gpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff2);
  }
}

INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionMixLayer);
}  // namespace caffe

现在gpu加速开启，飞速训练中……

其实，caffe源码中已经给了Eltwise的相关操作(相加SUM，取最大MAX，乘法PROD)，eltwise_layer.cpp代码如下：

#include <cfloat>
#include <vector>

#include "caffe/layers/eltwise_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK(this->layer_param().eltwise_param().coeff_size() == 0
      || this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
      "Eltwise Layer takes one coefficient per bottom blob.";
  CHECK(!(this->layer_param().eltwise_param().operation()
      == EltwiseParameter_EltwiseOp_PROD
      && this->layer_param().eltwise_param().coeff_size())) <<
      "Eltwise layer only takes coefficients for summation.";
  op_ = this->layer_param_.eltwise_param().operation();
  // Blob-wise coefficients for the elementwise operation.
  coeffs_ = vector<Dtype>(bottom.size(), 1);
  if (this->layer_param().eltwise_param().coeff_size()) {
    for (int i = 0; i < bottom.size(); ++i) {
      coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
    }
  }
  stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
}

template <typename Dtype>
void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  for (int i = 1; i < bottom.size(); ++i) {
    CHECK(bottom[i]->shape() == bottom[0]->shape());
  }
  top[0]->ReshapeLike(*bottom[0]);
  // If max operation, we will initialize the vector index part.
  if (this->layer_param_.eltwise_param().operation() ==
      EltwiseParameter_EltwiseOp_MAX && top.size() == 1) {
    max_idx_.Reshape(bottom[0]->shape());
  }
}

template <typename Dtype>
void EltwiseLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  int* mask = NULL;
  const Dtype* bottom_data_a = NULL;
  const Dtype* bottom_data_b = NULL;
  const int count = top[0]->count();
  Dtype* top_data = top[0]->mutable_cpu_data();
  switch (op_) {
  case EltwiseParameter_EltwiseOp_PROD:
    caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data);
    for (int i = 2; i < bottom.size(); ++i) {
      caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data);
    }
    break;
  case EltwiseParameter_EltwiseOp_SUM:
    caffe_set(count, Dtype(0), top_data);
    // TODO(shelhamer) does BLAS optimize to sum for coeff = 1?
    for (int i = 0; i < bottom.size(); ++i) {
      caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
    }
    break;
  case EltwiseParameter_EltwiseOp_MAX:
    // Initialize
    mask = max_idx_.mutable_cpu_data();
    caffe_set(count, -1, mask);
    caffe_set(count, Dtype(-FLT_MAX), top_data);
    // bottom 0 & 1
    bottom_data_a = bottom[0]->cpu_data();
    bottom_data_b = bottom[1]->cpu_data();
    for (int idx = 0; idx < count; ++idx) {
      if (bottom_data_a[idx] > bottom_data_b[idx]) {
        top_data[idx] = bottom_data_a[idx];  // maxval
        mask[idx] = 0;  // maxid
      } else {
        top_data[idx] = bottom_data_b[idx];  // maxval
        mask[idx] = 1;  // maxid
      }
    }
    // bottom 2++
    for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
      bottom_data_b = bottom[blob_idx]->cpu_data();
      for (int idx = 0; idx < count; ++idx) {
        if (bottom_data_b[idx] > top_data[idx]) {
          top_data[idx] = bottom_data_b[idx];  // maxval
          mask[idx] = blob_idx;  // maxid
        }
      }
    }
    break;
  default:
    LOG(FATAL) << "Unknown elementwise operation.";
  }
}

template <typename Dtype>
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const int* mask = NULL;
  const int count = top[0]->count();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* top_diff = top[0]->cpu_diff();
  for (int i = 0; i < bottom.size(); ++i) {
    if (propagate_down[i]) {
      const Dtype* bottom_data = bottom[i]->cpu_data();
      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
      switch (op_) {
      case EltwiseParameter_EltwiseOp_PROD:
        if (stable_prod_grad_) {
          bool initialized = false;
          for (int j = 0; j < bottom.size(); ++j) {
            if (i == j) { continue; }
            if (!initialized) {
              caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
              initialized = true;
            } else {
              caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
                        bottom_diff);
            }
          }
        } else {
          caffe_div(count, top_data, bottom_data, bottom_diff);
        }
        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
        break;
      case EltwiseParameter_EltwiseOp_SUM:
        if (coeffs_[i] == Dtype(1)) {
          caffe_copy(count, top_diff, bottom_diff);
        } else {
          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
        }
        break;
      case EltwiseParameter_EltwiseOp_MAX:
        mask = max_idx_.cpu_data();
        for (int index = 0; index < count; ++index) {
          Dtype gradient = 0;
          if (mask[index] == i) {
            gradient += top_diff[index];
          }
          bottom_diff[index] = gradient;
        }
        break;
      default:
        LOG(FATAL) << "Unknown elementwise operation.";
      }
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(EltwiseLayer);
#endif

INSTANTIATE_CLASS(EltwiseLayer);
REGISTER_LAYER_CLASS(Eltwise);

}  // namespace caffe