SimgoidLayer 的定义
neuron_layer.h
template <typename Dtype>
class SigmoidLayer : public NeuronLayer<Dtype> {
public:
explicit SigmoidLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual inline const char* type() const { return "Sigmoid"; }
protected://正向传播和反向传播
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
};
sigmoid_layer.cpp
template <typename Dtype>
inline Dtype sigmoid(Dtype x) {//内联函数,在程序编译时,编译器将程序中出现的内联函数的调用表达式用内联函数的函数体来进行替代。
return 1. / (1. + exp(-x));
}
template <typename Dtype>
void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const int count = bottom[0]->count();
for (int i = 0; i < count; ++i) {
top_data[i] = sigmoid(bottom_data[i]);
}
}
template <typename Dtype>
void SigmoidLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {//propagate_down[0]不为0就进行反向传播
const Dtype* top_data = top[0]->cpu_data();
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const int count = bottom[0]->count();
for (int i = 0; i < count; ++i) {
const Dtype sigmoid_x = top_data[i];
bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x);
}
}
sigmoid_layer.cu
template <typename Dtype>
__global__ void SigmoidForward(const int n, const Dtype* in, Dtype* out) {
/*#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * gridDim.x)
*///下面这段的含义是进行一列一列的处理。
CUDA_KERNEL_LOOP(index, n) {
out[index] = 1. / (1. + exp(-in[index]));
}
}
template <typename Dtype>
void SigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
SigmoidForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data);
CUDA_POST_KERNEL_CHECK;//判断返回是否争取
//#define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
/*
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
//这个是判断是否等于cudaSuccess,如果成功,返回的是cudaSuccess,失败的话打印出来错误的编号。
} while (0)
*/
// << " count: " << count << " bottom_data: "
// << (unsigned long)bottom_data
// << " top_data: " << (unsigned long)top_data
// << " blocks: " << CAFFE_GET_BLOCKS(count)
// << " threads: " << CAFFE_CUDA_NUM_THREADS;
}
template <typename Dtype>
__global__ void SigmoidBackward(const int n, const Dtype* in_diff,
const Dtype* out_data, Dtype* out_diff) {
CUDA_KERNEL_LOOP(index, n) {
const Dtype sigmoid_x = out_data[index];
out_diff[index] = in_diff[index] * sigmoid_x * (1 - sigmoid_x);
}
}
template <typename Dtype>
void SigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_data = top[0]->gpu_data();
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int count = bottom[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
/*CAFFE_CUDA_NUM_THREADS 的定义
#if __CUDA_ARCH__ >= 200
const int CAFFE_CUDA_NUM_THREADS = 1024;
#else
const int CAFFE_CUDA_NUM_THREADS = 512;
#endif
inline int CAFFE_GET_BLOCKS(const int N) {//使用好简洁啊,grid是一维度的。block也是一维的。
return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
}
*/
SigmoidBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, top_data, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}