github的地址:https://github.com/ydwen/caffe-face
CenterLoss_layer可以在原分类的基础上(某种程度上)可提升几个点左右(博主测试提升6个点),还不占前向时间,好东西啊!
原理这里不介绍了,网上应有尽有!这里只是介绍如何在caffe中如何添加CenterLoss_layer这样的新层!
第一步:修改caffe.proto以添加消息机制
1、在message LayerParameter{}中添加如下代码:
- optional CenterLossParameter center_loss_param = 147;
2、在 caffe.proto文末 添加
- message CenterLossParameter {
- optional uint32 num_output = 1; // The number of outputs for the layer
- optional FillerParameter center_filler = 2; // The filler for the centers
- // The first axis to be lumped into a single inner product computation;
- // all preceding axes are retained in the output.
- // May be negative to index from the end (e.g., -1 for the last axis).
- optional int32 axis = 3 [default = 1];
- }
第二步:添加头文件center_loss_layer.hpp
- #ifndef CAFFE_CENTER_LOSS_LAYER_HPP_
- #define CAFFE_CENTER_LOSS_LAYER_HPP_
- #include <vector>
- #include "caffe/blob.hpp"
- #include "caffe/layer.hpp"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/layers/loss_layer.hpp"
- namespace caffe {
- template <typename Dtype>
- class CenterLossLayer : public LossLayer<Dtype> {
- public:
- explicit CenterLossLayer(const LayerParameter& param)
- : LossLayer<Dtype>(param) {}
- virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "CenterLoss"; }
- virtual inline int ExactNumBottomBlobs() const { return 2; }
- virtual inline int ExactNumTopBlobs() const { return -1; }
- protected:
- virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
- virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- int M_;
- int K_;
- int N_;
- Blob<Dtype> distance_;
- Blob<Dtype> variation_sum_;
- };
- } // namespace caffe
- #endif // CAFFE_CENTER_LOSS_LAYER_HPP_
第三步:添加center_loss_layer.cpp以及center_loss_layer.cu
- #include <vector>
- #include "caffe/filler.hpp"
- #include "caffe/layers/center_loss_layer.hpp"
- #include "caffe/util/math_functions.hpp"
- namespace caffe {
- template <typename Dtype>
- void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- const int num_output = this->layer_param_.center_loss_param().num_output();
- N_ = num_output;
- const int axis = bottom[0]->CanonicalAxisIndex(
- this->layer_param_.center_loss_param().axis());
- // Dimensions starting from "axis" are "flattened" into a single
- // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
- // and axis == 1, N inner products with dimension CHW are performed.
- K_ = bottom[0]->count(axis);
- // Check if we need to set up the weights
- if (this->blobs_.size() > 0) {
- LOG(INFO) << "Skipping parameter initialization";
- } else {
- this->blobs_.resize(1);
- // Intialize the weight
- vector<int> center_shape(2);
- center_shape[0] = N_;
- center_shape[1] = K_;
- this->blobs_[0].reset(new Blob<Dtype>(center_shape));
- // fill the weights
- shared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>(
- this->layer_param_.center_loss_param().center_filler()));
- center_filler->Fill(this->blobs_[0].get());
- } // parameter initialization
- this->param_propagate_down_.resize(this->blobs_.size(), true);
- }
- template <typename Dtype>
- void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- CHECK_EQ(bottom[1]->channels(), 1);
- CHECK_EQ(bottom[1]->height(), 1);
- CHECK_EQ(bottom[1]->width(), 1);
- M_ = bottom[0]->num();
- // The top shape will be the bottom shape with the flattened axes dropped,
- // and replaced by a single axis with dimension num_output (N_).
- LossLayer<Dtype>::Reshape(bottom, top);
- distance_.ReshapeLike(*bottom[0]);
- variation_sum_.ReshapeLike(*this->blobs_[0]);
- }
- template <typename Dtype>
- void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- const Dtype* bottom_data = bottom[0]->cpu_data();
- const Dtype* label = bottom[1]->cpu_data();
- const Dtype* center = this->blobs_[0]->cpu_data();
- Dtype* distance_data = distance_.mutable_cpu_data();
- // the i-th distance_data
- for (int i = 0; i < M_; i++) {
- const int label_value = static_cast<int>(label[i]);
- // D(i,:) = X(i,:) - C(y(i),:)
- caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_);
- }
- Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data());
- Dtype loss = dot / M_ / Dtype(2);
- top[0]->mutable_cpu_data()[0] = loss;
- }
- template <typename Dtype>
- void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down,
- const vector<Blob<Dtype>*>& bottom) {
- // Gradient with respect to centers
- if (this->param_propagate_down_[0]) {
- const Dtype* label = bottom[1]->cpu_data();
- Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();
- Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();
- const Dtype* distance_data = distance_.cpu_data();
- // \sum_{y_i==j}
- caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
- for (int n = 0; n < N_; n++) {
- int count = 0;
- for (int m = 0; m < M_; m++) {
- const int label_value = static_cast<int>(label[m]);
- if (label_value == n) {
- count++;
- caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_);
- }
- }
- caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_);
- }
- }
- // Gradient with respect to bottom data
- if (propagate_down[0]) {
- caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());
- caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());
- }
- if (propagate_down[1]) {
- LOG(FATAL) << this->type()
- << " Layer cannot backpropagate to label inputs.";
- }
- }
- #ifdef CPU_ONLY
- STUB_GPU(CenterLossLayer);
- #endif
- INSTANTIATE_CLASS(CenterLossLayer);
- REGISTER_LAYER_CLASS(CenterLoss);
- } // namespace caffe
- #include <vector>
- #include "caffe/filler.hpp"
- #include "caffe/layers/center_loss_layer.hpp"
- #include "caffe/util/math_functions.hpp"
- namespace caffe {
- template <typename Dtype>
- __global__ void Compute_distance_data_gpu(int nthreads, const int K, const Dtype* bottom,
- const Dtype* label, const Dtype* center, Dtype* distance) {
- CUDA_KERNEL_LOOP(index, nthreads) {
- int m = index / K;
- int k = index % K;
- const int label_value = static_cast<int>(label[m]);
- // distance(i) = x(i) - c_{y(i)}
- distance[index] = bottom[index] - center[label_value * K + k];
- }
- }
- template <typename Dtype>
- __global__ void Compute_center_diff_gpu(int nthreads, const int M, const int K,
- const Dtype* label, const Dtype* distance, Dtype* variation_sum,
- Dtype* center_diff) {
- CUDA_KERNEL_LOOP(index, nthreads) {
- int count = 0;
- for (int m = 0; m < M; m++) {
- const int label_value = static_cast<int>(label[m]);
- if (label_value == index) {
- count++;
- for (int k = 0; k < K; k++) {
- variation_sum[index * K + k] -= distance[m * K + k];
- }
- }
- }
- for (int k = 0; k < K; k++) {
- center_diff[index * K + k] = variation_sum[index * K + k] /(count + (Dtype)1.);
- }
- }
- }
- template <typename Dtype>
- void CenterLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- int nthreads = M_ * K_;
- Compute_distance_data_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, K_, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
- this->blobs_[0]->gpu_data(), distance_.mutable_gpu_data());
- Dtype dot;
- caffe_gpu_dot(M_ * K_, distance_.gpu_data(), distance_.gpu_data(), &dot);
- Dtype loss = dot / M_ / Dtype(2);
- top[0]->mutable_cpu_data()[0] = loss;
- }
- template <typename Dtype>
- void CenterLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down,
- const vector<Blob<Dtype>*>& bottom) {
- int nthreads = N_;
- caffe_gpu_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
- Compute_center_diff_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
- CAFFE_CUDA_NUM_THREADS>>>(nthreads, M_, K_, bottom[1]->gpu_data(), distance_.gpu_data(),
- variation_sum_.mutable_cpu_data(), this->blobs_[0]->mutable_gpu_diff());
- if (propagate_down[0]) {
- caffe_gpu_scale(M_ * K_, top[0]->cpu_diff()[0] / M_,
- distance_.gpu_data(), bottom[0]->mutable_gpu_diff());
- }
- if (propagate_down[1]) {
- LOG(FATAL) << this->type()
- << " Layer cannot backpropagate to label inputs.";
- }
- }
- INSTANTIATE_LAYER_GPU_FUNCS(CenterLossLayer);
- } // namespace caffe
第四步:重新编译caffe