简谈caffe中的LRN层

最新推荐文章于 2020-09-01 08:54:11 发布

hjxu2016

最新推荐文章于 2020-09-01 08:54:11 发布

阅读量3k

点赞数 1

分类专栏：深度框架|caffe

本文链接：https://blog.csdn.net/hjxu2016/article/details/81808586

版权

深度框架|caffe 专栏收录该内容

47 篇文章 1 订阅

订阅专栏

昨日面试，被问道BN层和LRN层，一直以来用的都是ｂｎ，所以当时对ＬＲＮ只剩下点印象，事后弥补了一下这边知识点的不足，浅谈自己对ＬＲＮ层的理解，如果误导，欢迎指正．

ＬＲＮ层的作用是对局部区域进行归一化，对响应比较大的值变得更大，对响应比较小的值抑制．先看一下ｃａｆｆｅ的代码

可以看到四个参数．

layer {
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
  //  norm_region:ACROSS_CHANNELS，表示相邻通道求和归一化，局部区域块形状为//local_size＊１＊１，如果是WITHIN_CHANNEL表示一个通道内部求和归一，局部区域形状为//1*local_zie*local_size
    // 
    local_size: 5//默认值５，如果是跨通道的，表示求和的通道数；如果在通道内，表示求和正方形区域长度
    alpha: 0.0001//公式的参数，默认为１
    beta: 0.75//公式的参数，默认为５
  }
}

归一化时，每个输入值都将除以，公式里对应的公式就是参数里对应的值，当然，后面的caffe版本中１是Ｋ，也是一个参数值，看看源码．在caffe/src/caffe/layers/lrn_layer.cpp中，代码上有对应的注释

#include <vector>

#include "caffe/layers/lrn_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  size_ = this->layer_param_.lrn_param().local_size();//提取size＿的值
  CHECK_EQ(size_ % 2, 1) << "LRN only supports odd values for local_size";//检查是否为奇数，不是则报错
  pre_pad_ = (size_ - 1) / 2;　//计算ｐａｄ值，前后各补一半０
  alpha_ = this->layer_param_.lrn_param().alpha();//提取ａｌｐｈａ
  beta_ = this->layer_param_.lrn_param().beta();//提取ｂｅｔａ
  k_ = this->layer_param_.lrn_param().k();//提取k
  if (this->layer_param_.lrn_param().norm_region() ==//如果是ｗｉｔｈｉｎ＿ｃｈａｎｎｅｌ模式，还需要初始化一系列中间层，暂且不关注
      LRNParameter_NormRegion_WITHIN_CHANNEL) {
    // Set up split_layer_ to use inputs in the numerator and denominator.
    split_top_vec_.clear();
    split_top_vec_.push_back(&product_input_);
    split_top_vec_.push_back(&square_input_);
    LayerParameter split_param;
    split_layer_.reset(new SplitLayer<Dtype>(split_param));
    split_layer_->SetUp(bottom, split_top_vec_);
    // Set up square_layer_ to square the inputs.
    square_bottom_vec_.clear();
    square_top_vec_.clear();
    square_bottom_vec_.push_back(&square_input_);
    square_top_vec_.push_back(&square_output_);
    LayerParameter square_param;
    square_param.mutable_power_param()->set_power(Dtype(2));
    square_layer_.reset(new PowerLayer<Dtype>(square_param));
    square_layer_->SetUp(square_bottom_vec_, square_top_vec_);
    // Set up pool_layer_ to sum over square neighborhoods of the input.
    pool_top_vec_.clear();
    pool_top_vec_.push_back(&pool_output_);
    LayerParameter pool_param;
    pool_param.mutable_pooling_param()->set_pool(
        PoolingParameter_PoolMethod_AVE);
    pool_param.mutable_pooling_param()->set_pad(pre_pad_);
    pool_param.mutable_pooling_param()->set_kernel_size(size_);
    pool_layer_.reset(new PoolingLayer<Dtype>(pool_param));
    pool_layer_->SetUp(square_top_vec_, pool_top_vec_);
    // Set up power_layer_ to compute (1 + alpha_/N^2 s)^-beta_, where s is
    // the sum of a squared neighborhood (the output of pool_layer_).
    power_top_vec_.clear();
    power_top_vec_.push_back(&power_output_);
    LayerParameter power_param;
    power_param.mutable_power_param()->set_power(-beta_);
    power_param.mutable_power_param()->set_scale(alpha_);
    power_param.mutable_power_param()->set_shift(Dtype(1));
    power_layer_.reset(new PowerLayer<Dtype>(power_param));
    power_layer_->SetUp(pool_top_vec_, power_top_vec_);
    // Set up a product_layer_ to compute outputs by multiplying inputs by the
    // inverse demoninator computed by the power layer.
    product_bottom_vec_.clear();
    product_bottom_vec_.push_back(&product_input_);
    product_bottom_vec_.push_back(&power_output_);
    LayerParameter product_param;
    EltwiseParameter* eltwise_param = product_param.mutable_eltwise_param();
    eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
    product_layer_.reset(new EltwiseLayer<Dtype>(product_param));
    product_layer_->SetUp(product_bottom_vec_, top);
  }
}

template <typename Dtype>
void LRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
      << "corresponding to (num, channels, height, width)";
  num_ = bottom[0]->num();
  channels_ = bottom[0]->channels();
  height_ = bottom[0]->height();
  width_ = bottom[0]->width();//初始化ｎ\c\h\w的值
  switch (this->layer_param_.lrn_param().norm_region()) {//看参数normal_region选的是什么
  case LRNParameter_NormRegion_ACROSS_CHANNELS://如果是通道间归一化
    top[0]->Reshape(num_, channels_, height_, width_);//将ｔｏｐ尺寸和ｂｏｔｔｏｍ尺寸设置成一样大，这样归一化时，只需要
//ｓｃａｌｅ_值乘ｂｏｔｔｏｍ值就可以得到相应的top值
    scale_.Reshape(num_, channels_, height_, width_);
    break;
  case LRNParameter_NormRegion_WITHIN_CHANNEL://如果是空间区域归一化
    split_layer_->Reshape(bottom, split_top_vec_);
    square_layer_->Reshape(square_bottom_vec_, square_top_vec_);
    pool_layer_->Reshape(square_top_vec_, pool_top_vec_);
    power_layer_->Reshape(pool_top_vec_, power_top_vec_);
    product_layer_->Reshape(product_bottom_vec_, top);
    break;
  }
}

template <typename Dtype>
void LRNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  switch (this->layer_param_.lrn_param().norm_region()) {
  case LRNParameter_NormRegion_ACROSS_CHANNELS:
    CrossChannelForward_cpu(bottom, top);
    break;
  case LRNParameter_NormRegion_WITHIN_CHANNEL:
    WithinChannelForward(bottom, top);
    break;
  default:
    LOG(FATAL) << "Unknown normalization region.";
  }
}

template <typename Dtype>
void LRNLayer<Dtype>::CrossChannelForward_cpu(
const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
Dtype* scale_data = scale_.mutable_cpu_data();//用指针获取每个Blob对象的内存地址，便于后面操作
// start with the constant value
for (int i = 0; i < scale_.count(); ++i) {//初始化值为1.0
scale_data[i] = 1.;
}
Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_);//补零后的Blob，第三维尺寸比bottom大了size_ - 1；
Dtype* padded_square_data = padded_square.mutable_cpu_data();
caffe_set(padded_square.count(), Dtype(0), padded_square_data);//先清零
Dtype alpha_over_size = alpha_ / size_;//预先计算公式中的alpha/n
// go through the images
for (int n = 0; n < num_; ++n) {//bottom的第四维尺寸num_，需要分解为单个来做归一化
// compute the padded square
caffe_sqr(channels_ * height_ * width_,
    bottom_data + bottom[0]->offset(n),
    padded_square_data + padded_square.offset(0, pre_pad_));//计算bottom的平方，放入padded_square矩阵中，前pre_pad_个位置依旧0
// Create the first channel scale
for (int c = 0; c < size_; ++c) {//对n个通道平方求和并乘以预先算好的（alpha/n）,累加至scale_中（实现计算 1 + sum_under_i(x_i^2)）
  caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
      padded_square_data + padded_square.offset(0, c),
      scale_data + scale_.offset(n, 0));
}
for (int c = 1; c < channels_; ++c) {//这里使用了类似FIFO的形式计算其余scale_参数，每次向后移动一个单位，加头去尾，避免重复计算求和
  // copy previous scale
  caffe_copy<Dtype>(height_ * width_,
      scale_data + scale_.offset(n, c - 1),
      scale_data + scale_.offset(n, c));
  // add head
  caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
      padded_square_data + padded_square.offset(0, c + size_ - 1),
      scale_data + scale_.offset(n, c));
  // subtract tail
  caffe_axpy<Dtype>(height_ * width_, -alpha_over_size,
      padded_square_data + padded_square.offset(0, c - 1),
      scale_data + scale_.offset(n, c));
}
}

// In the end, compute output
caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data);//计算求指数，由于将除法转换为乘法，故指数变负
caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data);//bottom .* scale_ -> top
}

template <typename Dtype>
void LRNLayer<Dtype>::WithinChannelForward(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  split_layer_->Forward(bottom, split_top_vec_);
  square_layer_->Forward(square_bottom_vec_, square_top_vec_);
  pool_layer_->Forward(square_top_vec_, pool_top_vec_);
  power_layer_->Forward(pool_top_vec_, power_top_vec_);
  product_layer_->Forward(product_bottom_vec_, top);
}

template <typename Dtype>
void LRNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  switch (this->layer_param_.lrn_param().norm_region()) {
  case LRNParameter_NormRegion_ACROSS_CHANNELS:
    CrossChannelBackward_cpu(top, propagate_down, bottom);
    break;
  case LRNParameter_NormRegion_WITHIN_CHANNEL:
    WithinChannelBackward(top, propagate_down, bottom);
    break;
  default:
    LOG(FATAL) << "Unknown normalization region.";
  }
}

template <typename Dtype>
void LRNLayer<Dtype>::CrossChannelBackward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* top_data = top[0]->cpu_data();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* scale_data = scale_.cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  Blob<Dtype> padded_ratio(1, channels_ + size_ - 1, height_, width_);
  Blob<Dtype> accum_ratio(1, 1, height_, width_);
  Dtype* padded_ratio_data = padded_ratio.mutable_cpu_data();
  Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data();
  // We hack a little bit by using the diff() to store an additional result
  Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
  caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data);
  Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;

  caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
  caffe_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);

  // go through individual data
  int inverse_pre_pad = size_ - (size_ + 1) / 2;
  for (int n = 0; n < num_; ++n) {
    int block_offset = scale_.offset(n);
    // first, compute diff_i * y_i / s_i
    caffe_mul<Dtype>(channels_ * height_ * width_,
        top_diff + block_offset, top_data + block_offset,
        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
    caffe_div<Dtype>(channels_ * height_ * width_,
        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad),
        scale_data + block_offset,
        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
    // Now, compute the accumulated ratios and the bottom diff
    caffe_set(accum_ratio.count(), Dtype(0), accum_ratio_data);
    for (int c = 0; c < size_ - 1; ++c) {
      caffe_axpy<Dtype>(height_ * width_, 1.,
          padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
    }
    for (int c = 0; c < channels_; ++c) {
      caffe_axpy<Dtype>(height_ * width_, 1.,
          padded_ratio_data + padded_ratio.offset(0, c + size_ - 1),
          accum_ratio_data);
      // compute bottom diff
      caffe_mul<Dtype>(height_ * width_,
          bottom_data + top[0]->offset(n, c),
          accum_ratio_data, accum_ratio_times_bottom);
      caffe_axpy<Dtype>(height_ * width_, -cache_ratio_value,
          accum_ratio_times_bottom, bottom_diff + top[0]->offset(n, c));
      caffe_axpy<Dtype>(height_ * width_, -1.,
          padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
    }
  }
}

template <typename Dtype>
void LRNLayer<Dtype>::WithinChannelBackward(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {
    vector<bool> product_propagate_down(2, true);
    product_layer_->Backward(top, product_propagate_down, product_bottom_vec_);
    power_layer_->Backward(power_top_vec_, propagate_down, pool_top_vec_);
    pool_layer_->Backward(pool_top_vec_, propagate_down, square_top_vec_);
    square_layer_->Backward(square_top_vec_, propagate_down,
                            square_bottom_vec_);
    split_layer_->Backward(split_top_vec_, propagate_down, bottom);
  }
}

#ifdef CPU_ONLY
STUB_GPU(LRNLayer);
STUB_GPU_FORWARD(LRNLayer, CrossChannelForward);
STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
#endif

INSTANTIATE_CLASS(LRNLayer);

}  // namespace caffe

hjxu2016

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
3
评论
简谈caffe中的LRN层

昨日面试，被问道BN层和LRN层，一直以来用的都是ｂｎ，所以当时对ＬＲＮ只剩下点印象，事后弥补了一下这边知识点的不足，浅谈自己对ＬＲＮ层的理解，如果误导，欢迎指正．ＬＲＮ层的作用是对局部区域进行归一化，对响应比较大的值变得更大，对响应比较小的值抑制．先看一下ｃａｆｆｅ的代码可以看到四个参数．layer { name: "norm1" type: "LRN" bottom:...
复制链接

扫一扫

专栏目录