DUC/STDN的caffe源码实现

最新推荐文章于 2023-09-23 11:13:10 发布

Dlyldxwl

最新推荐文章于 2023-09-23 11:13:10 发布

阅读量1.2k

点赞数 1

分类专栏：代码

本文链接：https://blog.csdn.net/dlyldxwl/article/details/80230936

版权

代码专栏收录该内容

3 篇文章 0 订阅

订阅专栏

CVPR2018有一篇detection的文章STDN，我之前记录过此文的笔记，文章的上采样方法和DUC基本无异(-!!), 本人仍然坚守在caffe,目前要用到这个层，所以把这个层的代码实现一下。

先不谈源码，等backward写完后在放到我的git上面。因为之前也没怎么写过layer，所以此处记录会犯错的地方。

1. 有一些在setup,reshape，forward等都会用到的变量，要定义在hpp中，如果只在set up 中定义了，那么其他函数无权访问；

2.对于hpp中定义的变量，例如int group_;若在setup中对group进行赋值，其它函数礽需要使用同一个值，那么setup赋值时group_前面什么都不要加，就写group_=...，如果加了int，就表示这个变量仅属于setup了，其他函数就无法访问；

3. setup和reshape中变量无法互通，比如setup中num=bottom[0]->num()获得batchsize，在reshape中一定要继续写上这一条语句，否则无法获得正确的batchsize.forward等函数则不需要；

4.hpp结尾的时候不要忘了#endif; cpp结尾不要忘了INSTANTIATE_CLASS(EnlargeLayer)；REGISTER_LAYER_CLASS(Enlarge);

贴一下源码。github 地址为https://github.com/dlyldxwl/Similar-DUC-Caffe-implement

可能会不一样的地方是,当bottom channels不能整除c^2的时候,top的最后一个channel对应的最后一张map我是用剩下的maps类似与做"mean pool"得到的,backward 类似于mean pool

#ifndef CAFFE_ENLARGE_LAYER_HPP_
#define CAFFE_ENLARGE_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

template <typename Dtype>
class EnlargeLayer : public Layer<Dtype> {

public:
    explicit EnlargeLayer(const LayerParameter& param ): Layer<Dtype>(param) {}
    virtual void LayerSetup(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
    virtual inline const char* type() const { return "Enlarge"; }
    virtual inline int ExactNumBottomBlobs() const { return 1; }
    virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
    //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
    virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
    //virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

    int group_;
    int img_size_;
    int batch_;
    int ch_ori_;
    int h_ori_;
    int w_ori_;
    int scale_;
};

}
#endif

cpp文件

#include <algorithm>
#include <vector>
#include "caffe/layers/enlarge_layer.hpp"

namespace caffe {

template <typename Dtype>
void EnlargeLayer<Dtype>::LayerSetup(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top)
{
    img_size_ = this->layer_param_.enlarge_param().size();
    CHECK_GT(img_size_,0)<<"feature map size must be greater than 0";

    ch_ori_ = bottom[0]->channels();
    h_ori_ = bottom[0]->height();
    w_ori_ = bottom[0]->width();

    scale_ = int(img_size_ / h_ori_);
    group_ = int(ch_ori_ / (scale_*scale_)); //channels after enlarge

    CHECK_EQ(h_ori_,w_ori_)<<"the width and height of the feature map to be sampled are equal";
    CHECK_GT(img_size_,h_ori_)<<"size param need be greater than feature map size";
    
}

template <typename Dtype>
void EnlargeLayer<Dtype>:: Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top)
{
    img_size_ = this->layer_param_.enlarge_param().size();
    batch_ = bottom[0]->num();
    ch_ori_ = bottom[0]->channels();
    h_ori_ = bottom[0]->height();
    w_ori_ = bottom[0]->width();

    scale_ = int(img_size_ / h_ori_);
    group_ = int(ch_ori_ / (scale_*scale_));

    top[0]->Reshape(batch_,group_,img_size_,img_size_);
}

template <typename Dtype>
void EnlargeLayer<Dtype>:: Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top)
{
	const Dtype* bottom_data = bottom[0]->cpu_data();
	Dtype* top_data = top[0]->mutable_cpu_data();

	const int sp_os = bottom[0]->count(2);
	const int sp_ns = top[0]->count(2); 
	const int extra_maps_ = int(ch_ori_%(scale_*scale_));

	for (int m = 0; m < batch_; ++m)
	{
		for (int n = 0; n < group_; ++n)
		{
			if ((n!=group_-1)||(extra_maps_==0)) 
			{
				for (int h = 0; h < img_size_; ++h)
				{
					for (int w = 0; w < img_size_; ++w)
					{
						int index_n = h*img_size_ + w; //index of top(new) feature map
						int index_o = (h%scale_*scale_ + w%scale_)*sp_os + (h / scale_*w_ori_ + w / scale_);// index of bottom(old) feature map
						top_data[index_n] = bottom_data[index_o];
					}
				}
				bottom_data += scale_*scale_*sp_os;
				top_data += sp_ns;
			}
			else
			{
				for (int h = 0; h < img_size_; ++h)
				{
					for (int w = 0; w < img_size_; ++w)
					{
						int index_n = h*img_size_ + w; //index of top(new) feature map
						int map_ind_o = h%scale_*scale_ + w%scale_;
						if (map_ind_o != scale_*scale_-1) 
						{
							int index_o = map_ind_o*sp_os + (h / scale_*w_ori_ + w / scale_);// index of bottom(old) feature map
							top_data[index_n] = bottom_data[index_o];
						}
						else
						{
							Dtype sum=0.0;
							for (int i=0;i<=extra_maps_;++i)
							{
								int index_extra = (map_ind_o+i)*sp_os + (h / scale_*w_ori_ + w / scale_);
								sum+=bottom_data[index_extra];
							}
							Dtype ave=sum/(extra_maps_+1);
							top_data[index_n] = ave;
						}
					}
				}
			}
		}
	}	
}

template <typename Dtype>
void EnlargeLayer<Dtype>:: Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
{
	if(propagate_down[0])
	{
		const Dtype* top_diff = top[0]->cpu_diff();
		Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();

		const int sp_ns = top[0]->count(2);
		const int sp_os = bottom[0]->count(2);

		const int extra_maps_ = int(ch_ori_%(scale_*scale_));

		for (int m = 0; m < batch_; ++m)
		{
			for (int n = 0; n < group_; ++n)
			{
				if ((n!=group_-1)||(extra_maps_==0))
				{
					for (int h = 0; h < img_size_; ++h)
					{
						for (int w = 0; w < img_size_; ++w)
						{
							int index_n = h*img_size_ + w; //index of top feature map
							int index_o = (h%scale_*scale_ + w%scale_)*sp_os + (h / scale_*w_ori_ + w / scale_);// index of bottom feature map
							bottom_diff[index_o] = top_diff[index_n];
						}
					}
					bottom_diff += scale_*scale_*sp_os;
					top_diff += sp_ns;
				}
				else
				{
					for (int h = 0; h < img_size_; ++h)
					{
						for (int w = 0; w < img_size_; ++w)
						{
							int index_n = h*img_size_ + w; //index of top(new) feature map
							int map_ind_o = h%scale_*scale_ + w%scale_;
							if (map_ind_o != scale_*scale_-1)
							{
								int index_o = map_ind_o*sp_os + (h / scale_*w_ori_ + w / scale_);
								bottom_diff[index_o] = top_diff[index_n];
							}
							else
							{
								Dtype ave_diff = top_diff[index_n]/(extra_maps_+1);
								for (int i=0;i<=extra_maps_;++i)
								{
									int index_extra = (map_ind_o+i)*sp_os + (h / scale_*w_ori_ + w / scale_);
									bottom_diff[index_extra] = ave_diff;
								}
							} 
							
						}
					}
				}
			}
		}
	}
}

#ifdef CPU_ONLY
STUB_GPU(EnlargeLayer);
#endif

INSTANTIATE_CLASS(EnlargeLayer);
REGISTER_LAYER_CLASS(Enlarge);
	
}