参考网站1:https://blog.csdn.net/kkk584520/article/details/52721838
参考网站2:https://blog.csdn.net/seven_first/article/details/47378697#1-caffecpugemm-%E5%87%BD%E6%95%B0
系统:Ubuntu16.04
环境:caffe
知识背景:了解caffe的基本组成部分,能读懂C++语言,会GDB调试
原则:能用参考网站2中的函数,就不要自己写,不然会很麻烦
1.知识简介
1)了解参考网站2中的各种函数用法,这里以caffe_add()为例做说明:
template <>
void caffe_add<float>(const int n, const float* a, const float* b,
float* y) {
vsAdd(n, a, b, y);
}
函数实现功能:y=a+b,其中n表示相加的个数。
实际操作:在/caffe/util/math_functions.hpp头文件中同样有对caffe常用Dtype类型的数据进行处理的函数声明。
template <typename Dtype>
void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
因此,这里可以直接对当前层的多个输入进行相加,如两个输入bottom[0]->cpu_data()与bottom[1]->cpu_data(),相加实现方式如下:
caffe_add(bottom[0]->count(),bottom[0]->cpu_data(),bottom[1]->cpu_data(),top[0]->mutable_cpu_data());
ps:由于这个过程实现两个Dtype输入数据的加法,因此,需要满足两个输入数据大小,维度完全相同,即在使用之前,可以通过以下方式校验俩输入的batchsize,维度是否相同:
CHECK_EQ(bottom[1]->num(),bottom[0]->num());
CHECK_EQ(bottom[1]->count(),bottom[0]->count());
2)新建一个层若想连接多个输入层,多个输出层,则需要声明下面两个函数,没有声明的情况下都是默认1:
virtual inline int ExactNumBottomBlobs() const { return 2; }//连接两个输入层
virtual inline int ExactNumTopBlobs() const { return 1; }
2.实现步骤:
step1:编写自定义层的头文件(caffe中的层都是以类的数据结构进行定义的,类中的方法主要包括type、forward、backward等方法);
step2:编写自定义层的源文件(实现step1头文件中声明的层方法,在实现时,一定要在最后注册层参数,即用INSTANTIATE_CLASS、REGISTER_LAYER_CLASS注册);
step3:在caffe.proto中添加层参数ID(message LayerParameter中),以及添加自定义的层参数。
下面直接给代码:
头文件/caffe/include/caffe/layers/conv_mix_layer.hpp:
#ifndef CAFFE_CONV_MIX_LAYER_HPP_
#define CAFFE_CONV_MIX_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/math_functions.hpp"
#include "caffe/layers/neuron_layer.hpp"
namespace caffe {
template <typename Dtype>
class ConvolutionMixLayer : public NeuronLayer<Dtype> {
public:
explicit ConvolutionMixLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "ConvolutionMix"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }//最少的输入blob为1
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
Dtype key_scale;
};
} // namespace caffe
#endif // CAFFE_CONV_LAYER_HPP_
源文件/caffe/src/caffe/layers/conv_mix_layer.cpp:
#include <vector>
#include "caffe/layers/conv_mix_layer.hpp"
namespace caffe {
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
CHECK((convolution_mix_param.has_key_scale()))
<< "**Please set the key_scale value of conv_mix_param.**";
top[0]->Reshape(bottom[0]->shape());
}
//声明了必须实现,搞不懂,最多应该有一个warning呀。
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
const Dtype* bottom_data1 = bottom[0]->cpu_data();
const Dtype* bottom_data2 = bottom[1]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
key_scale = static_cast<float>(convolution_mix_param.key_scale());
CHECK_EQ(bottom[1]->num(),bottom[0]->num());
CHECK_EQ(bottom[1]->count(),bottom[0]->count());
const int count = bottom[0]->count();
// caffe_axpy(bottom[0]->num(),key_scale,bottom_data1,top_data);//top_data=1×bottom_data1+top_data
// caffe_axpy(bottom[1]->num(),key_scale,bottom_data2,top_data);
// for (int i=0; i<count;++i)
// {
// top_data[i]=(bottom_data1[i]+bottom_data2[i])*key_scale;
// }
caffe_add(count,bottom_data1,bottom_data2,top_data);
caffe_scal(count,key_scale,top_data);
}
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const int count = top[0]->count();
const Dtype* top_diff = top[0]->cpu_diff();
//读取我们实际指定的梯度放缩倍数,注意我们的参数名为key_scale
key_scale = this->layer_param_.convolution_mix_param().key_scale();
if (propagate_down[0]) {
Dtype* bottom_diff1 = bottom[0]->mutable_cpu_diff();
Dtype* bottom_diff2 = bottom[1]->mutable_cpu_diff();
caffe_cpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff1);
caffe_cpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff2);
}
}
#ifdef CPU_ONLY
STUB_GPU(ConvolutionMixLayer);
#endif
INSTANTIATE_CLASS(ConvolutionMixLayer);
REGISTER_LAYER_CLASS(ConvolutionMix);
} // namespace caffe
/caffe/src/caffe/proto/caffe.proto中添加层信息、参数:
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
optional ConvolutionMixParameter convolution_mix_param = 149;
//此处省略其他参数……
}
message ConvolutionMixParameter{
optional float key_scale = 1 [default = 1];
}
使用方式/caffe/examples/mnist/zhou_test.prototxt:
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
#add
#2_1
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
#2_2
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
#Deconvolution
layer {
name: "deconv"
type: "Deconvolution"
bottom: "conv2_2"
top: "deconv"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
#上面的做的是控制conv2_1与deconv的输入一致
# mix
#在train_test.prototxt中添加
#conv2_1与deconv的batchsize以及维度都应该相同
layer {
name: "convmix"
type: "ConvolutionMix"
bottom: "conv2_1"
bottom: "deconv"
top: "convmix"
convolution_mix_param{
key_scale:0.5
}
}
#后面是全连接层
/caffe/examples/mnist/solver.prototxt
# The train/test net protocol buffer definition
net: "examples/mnist/train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/test"
# solver mode: CPU or GPU
solver_mode: CPU
以上过程完成,编译运行,只能实现cpu训练,下一步就是要转换为gpu进行训练:
要点1:将cpu实现源码中的cpu相关函数全部改为gpu;
要点2:caffe_add改为caffe_gpu_add,caffe_scal改为caffe_gpu_scal;
要点3:层注册改为INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionMixLayer);
gpu实现源码/caffe/src/caffe/layers/conv_mix_layer.cu
#include <vector>
#include "caffe/layers/conv_mix_layer.hpp"
namespace caffe {
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
CHECK((convolution_mix_param.has_key_scale()))
<< "**Please set the key_scale value of conv_mix_param.**";
top[0]->Reshape(bottom[0]->shape());
}
//声明了必须实现,搞不懂,最多应该有一个warning呀。
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const ConvolutionMixParameter& convolution_mix_param = this->layer_param_.convolution_mix_param();
const Dtype* bottom_data1 = bottom[0]->gpu_data();
const Dtype* bottom_data2 = bottom[1]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
key_scale = static_cast<float>(convolution_mix_param.key_scale());
CHECK_EQ(bottom[1]->num(),bottom[0]->num());
CHECK_EQ(bottom[1]->count(),bottom[0]->count());
const int count = bottom[0]->count();
// caffe_axpy(bottom[0]->num(),key_scale,bottom_data1,top_data);//top_data=1×bottom_data1+top_data
caffe_gpu_add(count,bottom_data1,bottom_data2,top_data);
caffe_gpu_scal(count,key_scale,top_data);
}
template <typename Dtype>
void ConvolutionMixLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const int count = top[0]->count();
const Dtype* top_diff = top[0]->gpu_diff();
//读取我们实际指定的梯度放缩倍数,注意我们的参数名为key_scale
key_scale = this->layer_param_.convolution_mix_param().key_scale();
if (propagate_down[0]) {
Dtype* bottom_diff1 = bottom[0]->mutable_gpu_diff();
Dtype* bottom_diff2 = bottom[1]->mutable_gpu_diff();
caffe_gpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff1);
caffe_gpu_axpby(count, key_scale, top_diff, Dtype(0), bottom_diff2);
}
}
INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionMixLayer);
} // namespace caffe
现在gpu加速开启,飞速训练中……
其实,caffe源码中已经给了Eltwise的相关操作(相加SUM,取最大MAX,乘法PROD),eltwise_layer.cpp代码如下:
#include <cfloat>
#include <vector>
#include "caffe/layers/eltwise_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK(this->layer_param().eltwise_param().coeff_size() == 0
|| this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
"Eltwise Layer takes one coefficient per bottom blob.";
CHECK(!(this->layer_param().eltwise_param().operation()
== EltwiseParameter_EltwiseOp_PROD
&& this->layer_param().eltwise_param().coeff_size())) <<
"Eltwise layer only takes coefficients for summation.";
op_ = this->layer_param_.eltwise_param().operation();
// Blob-wise coefficients for the elementwise operation.
coeffs_ = vector<Dtype>(bottom.size(), 1);
if (this->layer_param().eltwise_param().coeff_size()) {
for (int i = 0; i < bottom.size(); ++i) {
coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
}
}
stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
}
template <typename Dtype>
void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
for (int i = 1; i < bottom.size(); ++i) {
CHECK(bottom[i]->shape() == bottom[0]->shape());
}
top[0]->ReshapeLike(*bottom[0]);
// If max operation, we will initialize the vector index part.
if (this->layer_param_.eltwise_param().operation() ==
EltwiseParameter_EltwiseOp_MAX && top.size() == 1) {
max_idx_.Reshape(bottom[0]->shape());
}
}
template <typename Dtype>
void EltwiseLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
int* mask = NULL;
const Dtype* bottom_data_a = NULL;
const Dtype* bottom_data_b = NULL;
const int count = top[0]->count();
Dtype* top_data = top[0]->mutable_cpu_data();
switch (op_) {
case EltwiseParameter_EltwiseOp_PROD:
caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data);
for (int i = 2; i < bottom.size(); ++i) {
caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data);
}
break;
case EltwiseParameter_EltwiseOp_SUM:
caffe_set(count, Dtype(0), top_data);
// TODO(shelhamer) does BLAS optimize to sum for coeff = 1?
for (int i = 0; i < bottom.size(); ++i) {
caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
}
break;
case EltwiseParameter_EltwiseOp_MAX:
// Initialize
mask = max_idx_.mutable_cpu_data();
caffe_set(count, -1, mask);
caffe_set(count, Dtype(-FLT_MAX), top_data);
// bottom 0 & 1
bottom_data_a = bottom[0]->cpu_data();
bottom_data_b = bottom[1]->cpu_data();
for (int idx = 0; idx < count; ++idx) {
if (bottom_data_a[idx] > bottom_data_b[idx]) {
top_data[idx] = bottom_data_a[idx]; // maxval
mask[idx] = 0; // maxid
} else {
top_data[idx] = bottom_data_b[idx]; // maxval
mask[idx] = 1; // maxid
}
}
// bottom 2++
for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
bottom_data_b = bottom[blob_idx]->cpu_data();
for (int idx = 0; idx < count; ++idx) {
if (bottom_data_b[idx] > top_data[idx]) {
top_data[idx] = bottom_data_b[idx]; // maxval
mask[idx] = blob_idx; // maxid
}
}
}
break;
default:
LOG(FATAL) << "Unknown elementwise operation.";
}
}
template <typename Dtype>
void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const int* mask = NULL;
const int count = top[0]->count();
const Dtype* top_data = top[0]->cpu_data();
const Dtype* top_diff = top[0]->cpu_diff();
for (int i = 0; i < bottom.size(); ++i) {
if (propagate_down[i]) {
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
switch (op_) {
case EltwiseParameter_EltwiseOp_PROD:
if (stable_prod_grad_) {
bool initialized = false;
for (int j = 0; j < bottom.size(); ++j) {
if (i == j) { continue; }
if (!initialized) {
caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
initialized = true;
} else {
caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
bottom_diff);
}
}
} else {
caffe_div(count, top_data, bottom_data, bottom_diff);
}
caffe_mul(count, bottom_diff, top_diff, bottom_diff);
break;
case EltwiseParameter_EltwiseOp_SUM:
if (coeffs_[i] == Dtype(1)) {
caffe_copy(count, top_diff, bottom_diff);
} else {
caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
}
break;
case EltwiseParameter_EltwiseOp_MAX:
mask = max_idx_.cpu_data();
for (int index = 0; index < count; ++index) {
Dtype gradient = 0;
if (mask[index] == i) {
gradient += top_diff[index];
}
bottom_diff[index] = gradient;
}
break;
default:
LOG(FATAL) << "Unknown elementwise operation.";
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(EltwiseLayer);
#endif
INSTANTIATE_CLASS(EltwiseLayer);
REGISTER_LAYER_CLASS(Eltwise);
} // namespace caffe