在caffe中添加RPN层，实现c++版Faster rcnn检测算法

最新推荐文章于 2019-03-29 10:08:38 发布

LaLaLand_XW

最新推荐文章于 2019-03-29 10:08:38 发布

阅读量823

点赞数

分类专栏： caffe C++ 文章标签： faster rcnn

本文链接：https://blog.csdn.net/sinat_26269257/article/details/85679339

版权

C++ 同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

caffe

3 篇文章 0 订阅

订阅专栏

该博客介绍了如何在Caffe中添加RPN层以实现C++版的Faster R-CNN目标检测算法。作者详细描述了在Windows环境下，使用Visual Studio 2013进行编译所需的步骤，包括参考文章、添加自定义层、编译proto文件以及项目配置等，并提到了在实际操作中遇到的问题和解决办法。

摘要由CSDN通过智能技术生成

摘要：在caffe根目录下，example/cpp_classification/文件夹下classification.cpp就是用c++实现分类的；caffe没有提供现成的实现检测的c++代码，因此本文主要是通过在caffe中新添加一个层RPN层，来实现c++目标检测算法；这里的检测模型是自己训练的检测试卷手写分数的检测模型。

环境配置

windows i7-3520M CPU @2.9GHz 内存3.23G、双核四线程

编译工具

Visual Studio 2013

参考文章

博主-真小假的纯C++版的Faster R-CNN（通过caffe自定义RPN层实现）
（备注：博主写的已经很完善了，但是在用vs2013编译的过程中，出现了一些坑，特此记录）

在caffe中添加RPN新层

在这里需要把RPN新层添加到Microsoft/caffe（github下载）中，编译pycaffe即可（因为我们不做训练，只需要通过网络进行前向预测）。

添加自定义层rpn_layer.hpp,存放路径/caffe-master/include/caffe/layers/下；

#ifndef CAFFE_RPN_LAYER_HPP_
#define CAFFE_RPN_LAYER_HPP_

#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"

#define mymax(a,b) ((a)>(b))?(a):(b)
#define mymin(a,b) ((a)>(b))?(b):(a)
namespace caffe {

	/**
	* @brief implement RPN layer for faster rcnn
	*/

	template <typename Dtype>
	class RPNLayer : public Layer<Dtype> {
	public:
		explicit RPNLayer(const LayerParameter& param)
			: Layer<Dtype>(param) {
				m_score_.reset(new Blob<Dtype>());
				m_box_.reset(new Blob<Dtype>());
				local_anchors_.reset(new Blob<Dtype>());
			}
		virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
			const vector<Blob<Dtype>*>& top);
		virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
			const vector<Blob<Dtype>*>& top){}
		virtual inline const char* type() const { return "RPN"; }

		struct abox{
			Dtype batch_ind;
			Dtype x1;
			Dtype y1;
			Dtype x2;
			Dtype y2;
			Dtype score;
			bool operator <(const abox&tmp) const{
				return score < tmp.score;
			}
		};

	protected:
		virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
			const vector<Blob<Dtype>*>& top);
		virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
			const vector<Blob<Dtype>*>& top);
		virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
			const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
		virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
			const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
			
		int feat_stride_;
		int base_size_;
		int min_size_;
		int pre_nms_topN_;
		int post_nms_topN_;
		float nms_thresh_;
		vector<int> anchor_scales_;
		vector<float> ratios_;
		
		vector<vector<float> > gen_anchors_;
		int *anchors_;
		int anchors_nums_;
		int src_height_;
		int src_width_;
		float src_scale_;
		int map_width_;
		int map_height_;

		shared_ptr<Blob<Dtype> > m_score_;
		shared_ptr<Blob<Dtype> > m_box_;
		shared_ptr<Blob<Dtype> >local_anchors_;
		void generate_anchors();
		vector<vector<float> > ratio_enum(vector<float>);
		vector<float> whctrs(vector<float>);
		vector<float> mkanchor(float w, float h, float x_ctr, float y_ctr);
		vector<vector<float> > scale_enum(vector<float>);
		
		void proposal_local_anchor();
		//cv::Mat proposal_local_anchor(int width, int height);
		void bbox_tranform_inv();
		cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta);
		void nms(std::vector<abox> &input_boxes, float nms_thresh);
		void filter_boxs(vector<abox>& aboxes);
		//void filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector<RPN::abox>& aboxes);
	};
}  // namespace caffe

#endif  // CAFFE_RPN_LAYER_HPP_

添加rpn_layer.cpp,存放路径/caffe-master/src/caffe/layers/下；

#include <algorithm>
#include <vector>

#include "caffe/layers/rpn_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include <opencv2/opencv.hpp>

int debug = 0;
int  tmp[9][4] = {
	{ -83, -39, 100, 56 },
	{ -175, -87, 192, 104 },
	{ -359, -183, 376, 200 },
	{ -55, -55, 72, 72 },
	{ -119, -119, 136, 136 },
	{ -247, -247, 264, 264 },
	{ -35, -79, 52, 96 },
	{ -79, -167, 96, 184 },
	{ -167, -343, 184, 360 }
};
namespace caffe {

	template <typename Dtype>
	void RPNLayer<Dtype>::LayerSetUp(
		const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
		anchor_scales_.clear();
		ratios_.clear();
		feat_stride_ = this->layer_param_.rpn_param().feat_stride();
		base_size_ = this->layer_param_.rpn_param().basesize();
		min_size_ = this->layer_param_.rpn_param().boxminsize();
		pre_nms_topN_ = this->layer_param_.rpn_param().per_nms_topn();
		post_nms_topN_ = this->layer_param_.rpn_param().post_nms_topn();
		nms_thresh_ = this->layer_param_.rpn_param().nms_thresh();
		int scales_num = this->layer_param_.rpn_param().scale_size();
		for (int i = 0; i < scales_num; ++i)
		{
			anchor_scales_.push_back(this->layer_param_.rpn_param().scale(i));
		}
		int ratios_num = this->layer_param_.rpn_param().ratio_size();
		for (int i = 0; i < ratios_num; ++i)
		{
			ratios_.push_back(this->layer_param_.rpn_param().ratio(i));
		}
		
		generate_anchors();
		
		anchors_nums_ = gen_anchors_.size();
		anchors_ = new int[anchors_nums_ * 4];
		for (int i = 0; i<gen_anchors_.size(); ++i)
		{
			for (int j = 0; j<gen_anchors_[i].size(); ++j)
			{
				anchors_[i * 4 + j] = gen_anchors_[i][j];
			}
		}
		top[0]->Reshape(1, 5, 1, 1);
		if (top.size() > 1)
		{
			top[1]->Reshape(1, 1, 1, 1);
		}
	}

	template <typename Dtype>
	void RPNLayer<Dtype>::generate_anchors(){
		//generate base anchor
		vector<float> base_anchor;
		base_anchor.push_back(0);
		base_anchor.push_back(0);
		base_anchor.push_back(base_size_ - 1);
		base_anchor.push_back(base_size_ - 1);
		//enum ratio anchors
		vector<vector<float> >ratio_anchors = ratio_enum(base_anchor);
		for (int i = 0; i < ratio_anchors.size(); ++i)
		{
			vector<vector<float> > tmp = scale_enum(ratio_anchors[i]);
			gen_anchors_.insert(gen_anchors_.end(), tmp.begin(), tmp.end());
		}
	}

	template <typename Dtype>
	vector<vector<float> > RPNLayer<Dtype>::scale_enum(vector<float> anchor){
		vector<vector<float> > result;
		vector<float> reform_anchor = whctrs(anchor);
		float x_ctr = reform_anchor[2];
		float y_ctr = reform_anchor[3];
		float w = reform_anchor[0];
		float h = reform_anchor[1];
		for (int i = 0; i < anchor_scales_.size(); ++i)
		{
			float ws = w * anchor_scales_[i];
			float hs = h *  anchor_scales_[i];
			vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
			result.push_back(tmp);
		}
		return result;
	}

	template <typename Dtype>
	vector<vector<float> > RPNLayer<Dtype>::ratio_enum(vector<float> anchor){
		vector<vector<float> > result;
		vector<float> reform_anchor = whctrs(anchor);
		float x_ctr = reform_anchor[2];
		float y_ctr = reform_anchor[3];
		float size = reform_anchor[0] * reform_anchor[1];
		for (int i = 0; i < ratios_.size(); ++i)
		{
			float size_ratios = size / ratios_[i];
			float ws = round(sqrt(size_ratios));
			float hs = round(ws*ratios_[i]);
			vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
			result.push_back(tmp);
		}
		return result;
	}

	template <typename Dtype>
	vector<float> RPNLayer<Dtype>::mkanchor(float w, float h, float x_ctr, float y_ctr){
		vector<float> tmp;
		tmp.push_back(x_ctr - 0.5*(w - 1));
		tmp.push_back(y_ctr - 0.5*(h - 1));
		tmp.push_back(x_ctr + 0.5*(w - 1));
		tmp.push_back(y_ctr + 0.5*(h - 1));
		return tmp;
	}
	
	template <typename Dtype>
	vector<float> RPNLayer<Dtype>::whctrs(vector<float> anchor){
		vector<float> result;
		result.push_back(anchor[2] - anchor[0] + 1); //w
		result.push_back(anchor[3] - anchor[1] + 1); //h
		result.push_back((anchor[2] + anchor[0]) / 2); //ctrx
		result.push_back((anchor[3] + anchor[1]) / 2); //ctry
		return result;
	}

	template <typename Dtype>
	void RPNLayer<Dtype>::proposal_local_anchor(){
		int length = mymax(map_width_, map_height_);
		int step = map_width_*map_height_;
		int *map_m = new int[length];
		for (int i = 0; i < length; ++i)
		{
			map_m[i] = i*feat_stride_;
		}
		Dtype *shift_x = new Dtype[step];
		Dtype *shift_y = new Dtype[step];
		for (int i = 0; i < map_height_; ++i)
		{
			for (int j = 0; j < map_width_; ++j)
			{
				shift_x[i*map_width_ + j] = map_m[j];
				shift_y[i*map_width_ + j] = map_m[i];
			}
		}
		local_anchors_->Reshape(1, anchors_nums_ * 4, map_height_, map_width_);
		Dtype *a = local_anchors_->mutable_cpu_data();
		for (int i = 0; i < anchors_nums_; ++i)
		{
			caffe_set(step, Dtype(anchors_[i * 4 + 0]), a + (i * 4 + 0) *step);
			caffe_set(step, Dtype(anchors_[i * 4 + 1]), a + (i * 4 + 1) *step);
			caffe_set(step, Dtype(anchors_[i * 4 + 2]), a + (i * 4 + 2) *step);
			caffe_set(step, Dtype(anchors_[i * 4 + 3]), a + (i * 4 + 3) *step);
			caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 0)*step);
			caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 2)*step);
			caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 1)*step);
			caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 3)*step);
		}
	}

	template<typename Dtype>
	void caffe::RPNLayer<Dtype>::filter_boxs(vector<abox>& aboxes)
	{
		float localMinSize = min_size_*src_scale_;
		aboxes.clear();
		int map_width = m_box_->width();
		int map_height = m_box_->height();
		int map_channel = m_box_->channels();
		const Dtype *box = m_box_->cpu_data();
		const Dtype *score = m_score_->cpu_data();

		int step = 4 * map_height*map_width;
		int one_step = map_height*map_width;
		int offset_w, offset_h, offset_x, offset_y, offset_s;

		for (int h = 0; h < map_height; ++h)
		{
			for (int w = 0; w < map_width; ++w)
			{
				offset_x = h*map_width + w;
				offset_y = offset_x + one_step;
				offset_w = offset_y + one_step;
				offset_h = offset_w + one_step;
				offset_s = one_step*anchors_nums_ + h*map_width + w;
				for (int c = 0; c < map_channel / 4; ++c)
				{
					Dtype width = box[offset_w], height = box[offset_h];
					if (width < localMinSize || height < localMinSize)
					{
					}
					else
					{
						abox tmp;
						tmp.batch_ind = 0;
						tmp.x1 = box[offset_x] - 0.5*width;
						tmp.y1 = box[offset_y] - 0.5*height;
						tmp.x2 = box[offset_x] + 0.5*width;
						tmp.y2 = box[offset_y] + 0.5*height;
						tmp.x1 = mymin(mymax(tmp.x1, 0), src_width_);
						tmp.y1 = mymin(mymax(tmp.y1, 0), src_height_);
						tmp.x2 = mymin(mymax(tmp.x2, 0), src_width_);
						tmp.y2 = mymin(mymax(tmp.y2, 0), src_height_);
						tmp.score = score[offset_s];
						aboxes.push_back(tmp);
					}
					offset_x += step;
					offset_y += step;
					offset_w += step;
					offset_h += step;
					offset_s += one_step;
				}
			}
		}
	}

	template<typename Dtype>
	void RPNLayer<Dtype>::bbox_tranform_inv(){
		int channel = m_box_->channels();
		int height = m_box_->height();
		int width = m_box_->width();
		int step = height*width;
		Dtype * a = m_box_->mutable_cpu_data();
		Dtype * b = local_anchors_->mutable_cpu_data();
		for (int i = 0; i < channel / 4; ++i)
		{
			caffe_axpy(2 * step, Dtype(-1), b + (i * 4 + 0)*step, b + (i * 4 + 2)*step);
			caffe_add_scalar(2 * step, Dtype(1), b + (i * 4 + 2)*step);
			caffe_axpy(2 * step, Dtype(0.5), b + (i * 4 + 2)*step, b + (i * 4 + 0)*step);

			caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
			caffe_add(2 * step, b + (i * 4 + 0)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);

			caffe_exp(2 * step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
			caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
		}
	}
	
	template<typename Dtype>
	void RPNLayer<Dtype>::nms(std::vector<abox> &input_boxes, float nms_thresh){
		std::vector<float>vArea(input_boxes.size());
		for (int i = 0; i < input_boxes.size(); ++i)
		{
			vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
				* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
		}
		for (int i = 0; i < input_boxes.size(); ++i)
		{
			for (int j = i + 1; j < input_boxes.size();)
			{
				float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
				float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
				float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
				float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
				float w = std::max(float(0), xx2 - xx1 + 1);
				float	h = std::max(float(0), yy2 - yy1 + 1);
				float	inter = w * h;
				float ovr = inter / (vArea[i] + vArea[j] - inter);
				if (ovr >= nms_thresh)
				{
					input_boxes.erase(input_boxes.begin() + j);
					vArea.erase(vArea.begin() + j);
				}
				else
				{
					j++;
				}
			}
		}
	}

	template <typename Dtype>
	void RPNLayer<Dtype>::Forward_cpu(
		const vector<Blob<Dtype>*>& bottom,
		const vector<Blob<Dtype>*>& top) {
		
		map_width_ = bottom[1]->width();
		map_height_ = bottom[1]->height();
		//int channels = bottom[1]->channels();

		//get boxs_delta,向右
		m_box_->CopyFrom(*(bottom[1]), false, true);
		
		//get sores 向右，前面anchors_nums_个位bg的得分，后面anchors_nums_为fg得分，我们需要的是后面的。
		m_score_->CopyFrom(*(bottom[0]), false, true);

		//get im_info
		src_height_ = bottom[2]->data_at(0, 0, 0, 0);
		src_width_ = bottom[2]->data_at(0, 1, 0, 0);
		src_scale_ = bottom[2]->data_at(0, 2, 0, 0);

		//gen local anchors 向右
		proposal_local_anchor();
		
		//Convert anchors into proposals via bbox transformations
		bbox_tranform_inv();

		vector<abox>aboxes;
		filter_boxs(aboxes);
		
		std::sort(aboxes.rbegin(), aboxes.rend()); //降序
		if (pre_nms_topN_ > 0)
		{
			int tmp = mymin(pre_nms_topN_, aboxes.size());
			aboxes.erase(aboxes.begin() + tmp, aboxes.end());
		}

		nms(aboxes, nms_thresh_);
		if (post_nms_topN_ > 0)
		{
			int tmp = mymin(post_nms_topN_, aboxes.size());
			aboxes.erase(aboxes.begin() + tmp, aboxes.end());
		}
		top[0]->Reshape(aboxes.size(), 5, 1, 1);
		Dtype *top0 = top[0]->mutable_cpu_data();
		for (int i = 0; i < aboxes.size(); ++i)
		{
			top0[0] = aboxes[i].batch_ind;
			top0[1] = aboxes[i].x1;
			top0[2] = aboxes[i].y1;
			top0[3] = aboxes[i].x2;
			top0[4] = aboxes[i].y2;
			top0 += top[0]->offset(1);
		}
		if (top.size()>1)
		{
			top[1]->Reshape(aboxes.size(), 1, 1, 1);
			Dtype *top1 = top[1]->mutable_cpu_data();
			for (int i = 0; i < aboxes.size(); ++i)
			{
				top1[0] = aboxes[i].score;
				top1 += top[1]->offset(1);
			}
		}
	}

	template <typename Dtype>
	void RPNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){}

	/*template <typename Dtype>
	void Backward_gpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){}*/

#ifdef CPU_ONLY
STUB_GPU(RPNLayer);
#endif

INSTANTIATE_CLASS(RPNLayer);
REGISTER_LAYER_CLASS(RPN);

}  // namespace caffe

在caffe/src/caffe/proto/caffe.proto 中声明这个类；

//这里的ID号，必须是唯一的，不能和其它层ID重复
optional RPNParameter rpn_param = 150;

message RPNParameter {
  optional uint32 feat_stride = 1;
  optional uint32 basesize = 2;
  repeated uint32 scale = 3;
  repeated float ratio = 4;
  optional uint32 boxminsize =5;
  optional uint32 per_nms_topn = 9;
  optional uint32 post_nms_topn = 11;
  optional float nms_thresh = 8;
}

将caffe.proto通过脚本编译生成caffe.pb.h和caffe.pb.cc文件；
我的另一篇博客，写清楚了编译过程，可参考
在common.hpp、common.cpp中分别声明和定义RPN命名空间中用到的函数；
为了防止不清楚代码加到common.hpp、common.cpp的具体位置，我把两个文件都贴上来；

//include/common.hpp
#ifndef CAFFE_COMMON_HPP_
#define CAFFE_COMMON_HPP_

#include <boost/shared_ptr.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>

#include <climits>
#include <cmath>
#include <fstream>  // NOLINT(readability/streams)
#include <iostream>  // NOLINT(readability/streams)
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>  // pair
#include <vector>

#include "caffe/util/device_alternate.hpp"

// Convert macro to string
#define STRINGIFY(m) #m
#define AS_STRING(m) STRINGIFY(m)

// gflags 2.1 issue: namespace google was changed to gflags without warning.
// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's
// remove the following hack.
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif  // GFLAGS_GFLAGS_H_

// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
  classname(const classname&);\
  classname& operator=(const classname&)

// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
  char gInstantiationGuard##classname; \
  template class classname<float>; \
  template class classname<double>

#define INSTANTIATE_LAYER_GPU_FORWARD(classname) \
  template void classname<float>::Forward_gpu( \
      const std::vector<Blob<float>*>& bottom, \
      const std::vector<Blob<float>*>& top); \
  template void classname<double>::Forward_gpu( \
      const std::vector<Blob<double>*>& bottom, \
      const std::vector<Blob<double>*>& top);

#define INSTANTIATE_LAYER_GPU_BACKWARD(classname) \
  template void classname<float>::Backward_gpu( \
      const std::vector<Blob<float>*>& top, \
      const std::vector<bool>& propagate_down, \
      const std::vector<Blob<float>*>& bottom); \
  template void classname<double>::Backward_gpu( \
      const std::vector<Blob<double>*>& top, \
      const std::vector<bool>& propagate_down, \
      const std::vector<Blob<double>*>& bottom)

#define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
  INSTANTIATE_LAYER_GPU_FORWARD(classname); \
  INSTANTIATE_LAYER_GPU_BACKWARD(classname)

// A simple macro to mark codes that are not implemented, so that when the code
// is executed we will see a fatal log.
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"

// See PR #1236
namespace cv { class Mat; }

namespace caffe {

// We will use the boost shared_ptr instead of the new C++11 one mainly
// because cuda does not work (at least now) well with C++11 features.
using boost::shared_ptr;

// Common functions and classes from std that caffe often uses.
using std::fstream;
using std::ios;
using std::isnan;
using std::isinf;
using std::iterator;
using std::make_pair;
using std::map;
using std::ostringstream;
using std::pair;
using std::set;
using std::string;
using std::stringstream;
using std::vector;

// A global initialization function that you should call in your main function.
// Currently it initializes google flags and google logging.
void GlobalInit(int* pargc, char*** pargv);

// A singleton class to hold common caffe stuff, such as the handler that
// caffe is going to use for cublas, curand, etc.
class Caffe {
 public:
  ~Caffe();

  // Thread local context for Caffe. Moved to common.cpp instead of
  // including boost/thread.hpp to avoid a boost/NVCC issues (#1009, #1010)
  // on OSX. Also fails on Linux with CUDA 7.0.18.
  static Caffe& Get();

  enum Brew { CPU, GPU };

  // This random number generator facade hides boost and CUDA rng
  // implementation from one another (for cross-platform compatibility).
  class RNG {
   public:
    RNG();
    explicit RNG(unsigned int seed);
    explicit RNG(const RNG&);
    RNG& operator=(const RNG&);
    void* generator();
   private:
    class Generator;
    shared_ptr<Generator> generator_;
  };

  // Getters for boost rng, curand, and cublas handles
  inline static RNG& rng_stream() {
    if (!Get().random_generator_) {
      Get().random_generator_.reset(new RNG());
    }
    return *(Get().random_generator_);
  }
#ifndef CPU_ONLY
  inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
  inline static curandGenerator_t curand_generator() {
    return Get().curand_generator_;
  }
#endif

  // Returns the mode: running on CPU or GPU.
  inline static Brew mode() { return Get().mode_; }
  // The setters for the variables
  // Sets the mode. It is recommended that you don't change the mode halfway
  // into the program since that may cause allocation of pinned memory being
  // freed in a non-pinned way, which may cause problems - I haven't verified
  // it personally but better to note it here in the header file.
  inline static void set_mode(Brew mode) { Get().mode_ = mode; }
  // Sets the random seed of both boost and curand
  static void set_random_seed(const unsigned int seed);
  // Sets the device. Since we have cublas and curand stuff, set device also
  // requires us to reset those values.
  static void SetDevice(const int device_id);
  // Prints the current GPU status.
  static void DeviceQuery();
  // Check if specified device is available
  static bool CheckDevice(const int device_id);
  // Search from start_id to the highest possible device ordinal,
  // return the ordinal of the first available device.
  static int FindDevice(const int start_id = 0);
  // Parallel training info
  inline static int solver_count() { return Get().solver_count_; }
  inline static void set_solver_count(int val) { Get().solver_count_ = val; }
  inline static bool root_solver() { return Get().root_solver_; }
  inline static void set_root_solver(bool val) { Get().root_solver_ = val; }

 protected:
#ifndef CPU_ONLY
  cublasHandle_t cublas_handle_;
  curandGenerator_t curand_generator_;
#endif
  shared_ptr<RNG> random_generator_;

  Brew mode_;
  int solver_count_;
  bool root_solver_;

 private:
  // The private constructor to avoid duplicate instantiation.
  Caffe();

  DISABLE_COPY_AND_ASSIGN(Caffe);
};

}  // namespace caffe

namespace RPN{
	struct abox
	{
		float x1;
		float y1;
		float x2;
		float y2;
		float score;
		bool operator <(const abox&tmp) const{
			return score < tmp.score;
		}
	};
	void nms(std::vector<abox>& input_boxes, float nms_thresh);
	cv::Mat bbox_tranform_inv(cv::Mat, cv::Mat);
} // namespace RPN

#endif  // CAFFE_COMMON_HPP_

//src/common.cpp
#if defined(_MSC_VER)
#include <process.h>
#define getpid() _getpid()
#endif

#include <boost/thread.hpp>
#include <glog/logging.h>
#include <cmath>
#include <cstdio>
#include <ctime>
#include<opencv2/opencv.hpp>

using namespace cv;

#include "caffe/util/rng.hpp"

namespace caffe {

// Make sure each thread can have different values.
static boost::thread_specific_ptr<Caffe> thread_instance_;

Caffe& Caffe::Get() {
  if (!thread_instance_.get()) {
    thread_instance_.reset(new Caffe());
  }
  return *(thread_instance_.get());
}

// random seeding
int64_t cluster_seedgen(void) {
  int64_t s, seed, pid;
  FILE* f = fopen("/dev/urandom", "rb");
  if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
    fclose(f);
    return seed;
  }

  LOG(INFO) << "System entropy source not available, "
              "using fallback algorithm to generate seed instead.";
  if (f)
    fclose(f);

  pid = getpid();
  s = time(NULL);
  seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
  return seed;
}


void GlobalInit(int* pargc, char*** pargv) {
  // Google flags.
  ::gflags::ParseCommandLineFlags(pargc, pargv, true);
  // Google logging.
  ::google::InitGoogleLogging(*(pargv)[0]);
  // Provide a backtrace on segfault.

  // Windows port of glogs doesn't have this function built
#if !defined(_MSC_VER)
  ::google::InstallFailureSignalHandler();
#endif
}

#ifdef CPU_ONLY  // CPU-only Caffe.

Caffe::Caffe()
    : random_generator_(), mode_(Caffe::CPU),
      solver_count_(1), root_solver_(true) { }

Caffe::~Caffe() { }

void Caffe::set_random_seed(const unsigned int seed) {
  // RNG seed
  Get().random_generator_.reset(new RNG(seed));
}

void Caffe::SetDevice(const int device_id) {
  NO_GPU;
}

void Caffe::DeviceQuery() {
  NO_GPU;
}

bool Caffe::CheckDevice(const int device_id) {
  NO_GPU;
  return false;
}

int Caffe::FindDevice(const int start_id) {
  NO_GPU;
  return -1;
}

class Caffe::RNG::Generator {
 public:
  Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
  explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
  caffe::rng_t* rng() { return rng_.get(); }
 private:
  shared_ptr<caffe::rng_t> rng_;
};

Caffe::RNG::RNG() : generator_(new Generator()) { }

Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }

Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
  generator_ = other.generator_;
  return *this;
}

void* Caffe::RNG::generator() {
  return static_cast<void*>(generator_->rng());
}

#else  // Normal GPU + CPU Caffe.

Caffe::Caffe()
    : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
    mode_(Caffe::CPU), solver_count_(1), root_solver_(true) {
  // Try to create a cublas handler, and report an error if failed (but we will
  // keep the program running as one might just want to run CPU code).
  if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
    LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
  }
  // Try to create a curand handler.
  if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
      != CURAND_STATUS_SUCCESS ||
      curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
      != CURAND_STATUS_SUCCESS) {
    LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
  }
}

Caffe::~Caffe() {
  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
  if (curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
  }
}

void Caffe::set_random_seed(const unsigned int seed) {
  // Curand seed
  static bool g_curand_availability_logged = false;
  if (Get().curand_generator_) {
    CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
        seed));
    CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
  } else {
    if (!g_curand_availability_logged) {
        LOG(ERROR) <<
            "Curand not available. Skipping setting the curand seed.";
        g_curand_availability_logged = true;
    }
  }
  // RNG seed
  Get().random_generator_.reset(new RNG(seed));
}

void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}

void Caffe::DeviceQuery() {
  cudaDeviceProp prop;
  int device;
  if (cudaSuccess != cudaGetDevice(&device)) {
    printf("No cuda device present.\n");
    return;
  }
  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
  LOG(INFO) << "Device id:                     " << device;
  LOG(INFO) << "Major revision number:         " << prop.major;
  LOG(INFO) << "Minor revision number:         " << prop.minor;
  LOG(INFO) << "Name:                          " << prop.name;
  LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
  LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
  LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
  LOG(INFO) << "Warp size:                     " << prop.warpSize;
  LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
  LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
  LOG(INFO) << "Maximum dimension of block:    "
      << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
      << prop.maxThreadsDim[2];
  LOG(INFO) << "Maximum dimension of grid:     "
      << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
      << prop.maxGridSize[2];
  LOG(INFO) << "Clock rate:                    " << prop.clockRate;
  LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
  LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
  LOG(INFO) << "Concurrent copy and execution: "
      << (prop.deviceOverlap ? "Yes" : "No");
  LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
  LOG(INFO) << "Kernel execution timeout:      "
      << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
  return;
}

bool Caffe::CheckDevice(const int device_id) {
  // This function checks the availability of GPU #device_id.
  // It attempts to create a context on the device by calling cudaFree(0).
  // cudaSetDevice() alone is not sufficient to check the availability.
  // It lazily records device_id, however, does not initialize a
  // context. So it does not know if the host thread has the permission to use
  // the device or not.
  //
  // In a shared environment where the devices are set to EXCLUSIVE_PROCESS
  // or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
  // even if the device is exclusively occupied by another process or thread.
  // Cuda operations that initialize the context are needed to check
  // the permission. cudaFree(0) is one of those with no side effect,
  // except the context initialization.
  bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
            (cudaSuccess == cudaFree(0)));
  // reset any error that may have occurred.
  cudaGetLastError();
  return r;
}

int Caffe::FindDevice(const int start_id) {
  // This function finds the first available device by checking devices with
  // ordinal from start_id to the highest available value. In the
  // EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
  // claims the device due to the initialization of the context.
  int count = 0;
  CUDA_CHECK(cudaGetDeviceCount(&count));
  for (int i = start_id; i < count; i++) {
    if (CheckDevice(i)) return i;
  }
  return -1;
}

class Caffe::RNG::Generator {
 public:
  Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
  explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
  caffe::rng_t* rng() { return rng_.get(); }
 private:
  shared_ptr<caffe::rng_t> rng_;
};

Caffe::RNG::RNG() : generator_(new Generator()) { }

Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }

Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
  generator_.reset(other.generator_.get());
  return *this;
}

void* Caffe::RNG::generator() {
  return static_cast<void*>(generator_->rng());
}

const char* cublasGetErrorString(cublasStatus_t error) {
  switch (error) {
  case CUBLAS_STATUS_SUCCESS:
    return "CUBLAS_STATUS_SUCCESS";
  case CUBLAS_STATUS_NOT_INITIALIZED:
    return "CUBLAS_STATUS_NOT_INITIALIZED";
  case CUBLAS_STATUS_ALLOC_FAILED:
    return "CUBLAS_STATUS_ALLOC_FAILED";
  case CUBLAS_STATUS_INVALID_VALUE:
    return "CUBLAS_STATUS_INVALID_VALUE";
  case CUBLAS_STATUS_ARCH_MISMATCH:
    return "CUBLAS_STATUS_ARCH_MISMATCH";
  case CUBLAS_STATUS_MAPPING_ERROR:
    return "CUBLAS_STATUS_MAPPING_ERROR";
  case CUBLAS_STATUS_EXECUTION_FAILED:
    return "CUBLAS_STATUS_EXECUTION_FAILED";
  case CUBLAS_STATUS_INTERNAL_ERROR:
    return "CUBLAS_STATUS_INTERNAL_ERROR";
#if CUDA_VERSION >= 6000
  case CUBLAS_STATUS_NOT_SUPPORTED:
    return "CUBLAS_STATUS_NOT_SUPPORTED";
#endif
#if CUDA_VERSION >= 6050
  case CUBLAS_STATUS_LICENSE_ERROR:
    return "CUBLAS_STATUS_LICENSE_ERROR";
#endif
  }
  return "Unknown cublas status";
}

const char* curandGetErrorString(curandStatus_t error) {
  switch (error) {
  case CURAND_STATUS_SUCCESS:
    return "CURAND_STATUS_SUCCESS";
  case CURAND_STATUS_VERSION_MISMATCH:
    return "CURAND_STATUS_VERSION_MISMATCH";
  case CURAND_STATUS_NOT_INITIALIZED:
    return "CURAND_STATUS_NOT_INITIALIZED";
  case CURAND_STATUS_ALLOCATION_FAILED:
    return "CURAND_STATUS_ALLOCATION_FAILED";
  case CURAND_STATUS_TYPE_ERROR:
    return "CURAND_STATUS_TYPE_ERROR";
  case CURAND_STATUS_OUT_OF_RANGE:
    return "CURAND_STATUS_OUT_OF_RANGE";
  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
    return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
    return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
  case CURAND_STATUS_LAUNCH_FAILURE:
    return "CURAND_STATUS_LAUNCH_FAILURE";
  case CURAND_STATUS_PREEXISTING_FAILURE:
    return "CURAND_STATUS_PREEXISTING_FAILURE";
  case CURAND_STATUS_INITIALIZATION_FAILED:
    return "CURAND_STATUS_INITIALIZATION_FAILED";
  case CURAND_STATUS_ARCH_MISMATCH:
    return "CURAND_STATUS_ARCH_MISMATCH";
  case CURAND_STATUS_INTERNAL_ERROR:
    return "CURAND_STATUS_INTERNAL_ERROR";
  }
  return "Unknown curand status";
}

#endif  // CPU_ONLY

}  // namespace caffe

namespace RPN{
	cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta){
		cv::Mat pre_box(local_anchors.rows, local_anchors.cols, CV_32FC1);
		for (int i = 0; i < local_anchors.rows; i++)
		{
			double pred_ctr_x, pred_ctr_y, src_ctr_x, src_ctr_y;
			double dst_ctr_x, dst_ctr_y, dst_scl_x, dst_scl_y;
			double src_w, src_h, pred_w, pred_h;
			src_w = local_anchors.at<float>(i, 2) - local_anchors.at<float>(i, 0) + 1;
			src_h = local_anchors.at<float>(i, 3) - local_anchors.at<float>(i, 1) + 1;
			src_ctr_x = local_anchors.at<float>(i, 0) + 0.5 * src_w;
			src_ctr_y = local_anchors.at<float>(i, 1) + 0.5 * src_h;

			dst_ctr_x = boxs_delta.at<float>(i, 0);
			dst_ctr_y = boxs_delta.at<float>(i, 1);
			dst_scl_x = boxs_delta.at<float>(i, 2);
			dst_scl_y = boxs_delta.at<float>(i, 3);
			pred_ctr_x = dst_ctr_x*src_w + src_ctr_x;
			pred_ctr_y = dst_ctr_y*src_h + src_ctr_y;
			pred_w = exp(dst_scl_x) * src_w;
			pred_h = exp(dst_scl_y) * src_h;

			pre_box.at<float>(i, 0) = pred_ctr_x - 0.5*pred_w;
			pre_box.at<float>(i, 1) = pred_ctr_y - 0.5*pred_h;
			pre_box.at<float>(i, 2) = pred_ctr_x + 0.5*pred_w;
			pre_box.at<float>(i, 3) = pred_ctr_y + 0.5*pred_h;
		}
		return pre_box;
	}
	void nms(std::vector<abox> &input_boxes, float nms_thresh){
		std::vector<float>vArea(input_boxes.size());
		for (int i = 0; i < input_boxes.size(); ++i)
		{
			vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
				* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
		}
		for (int i = 0; i < input_boxes.size(); ++i)
		{
			for (int j = i + 1; j < input_boxes.size();)
			{
				float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
				float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
				float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
				float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
				float w = std::max(float(0), xx2 - xx1 + 1);
				float   h = std::max(float(0), yy2 - yy1 + 1);
				float   inter = w * h;
				float ovr = inter / (vArea[i] + vArea[j] - inter);
				if (ovr >= nms_thresh)
				{
					input_boxes.erase(input_boxes.begin() + j);
					vArea.erase(vArea.begin() + j);
				}
				else
				{
					j++;
				}
			}
		}
	}
}

重新编译pycaffe，新层添加完毕；

目标检测算法项目实现（faster rcnn）

在同一解决方案下新建一个检测项目，层级结构如下：
caffe_32_bit是编译好pycaffe的项目，detect是新建的检测项目
head.h
特别注意，在用caffe某些层的时候，会报找不到XX层的错，这时候需要在head.h文件中将自己用到的层加进来，没有注册的层需要在下面的代码部分进行注册。

#include "caffe/common.hpp"
#include "caffe/layers/input_layer.hpp"
#include "caffe/layers/reshape_layer.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/layers/dropout_layer.hpp"
#include "caffe/layers/conv_layer.hpp"
#include "caffe/layers/relu_layer.hpp"
#include "caffe/layers/rpn_layer.hpp"
#include "caffe/layers/roi_pooling_layer.hpp"

#include "caffe/layers/pooling_layer.hpp"
#include "caffe/layers/lrn_layer.hpp"
#include "caffe/layers/softmax_layer.hpp"


namespace caffe
{

	extern INSTANTIATE_CLASS(InputLayer);
	extern INSTANTIATE_CLASS(InnerProductLayer);
	extern INSTANTIATE_CLASS(DropoutLayer);
	extern INSTANTIATE_CLASS(ConvolutionLayer);
	REGISTER_LAYER_CLASS(Convolution);
	extern INSTANTIATE_CLASS(ReLULayer);
	REGISTER_LAYER_CLASS(ReLU);
	extern INSTANTIATE_CLASS(ReshapeLayer);
	extern INSTANTIATE_CLASS(PoolingLayer);
	REGISTER_LAYER_CLASS(Pooling);
	extern INSTANTIATE_CLASS(LRNLayer);
	REGISTER_LAYER_CLASS(LRN);
	extern INSTANTIATE_CLASS(ROIPoolingLayer);
	extern INSTANTIATE_CLASS(RPNLayer);
	extern INSTANTIATE_CLASS(SoftmaxLayer);
	REGISTER_LAYER_CLASS(Softmax);

ObjectDetector.hpp

#ifndef OBJECTDETECTOR_H
#define OBJECTDETECTOR_H

#define INPUT_SIZE_NARROW  600
#define INPUT_SIZE_LONG  1000

#include <string>
#include "D://caffe_build_32_bit//caffe_build_32_bit//caffe-master//include//caffe//net.hpp"
#include "D://caffe_build_32_bit//caffe_build_32_bit//caffe-master//include//caffe//common.hpp"
#include <opencv2/core/core.hpp>
#include <iostream>
#include <memory>
#include <map>

using namespace std;

class ObjectDetector
{
public:

	ObjectDetector(const std::string &model_file, const std::string &weights_file);  //构造函数
	//对一张图片，进行检测，将结果保存进map数据结构里,分别表示每个类别对应的目标框，如果需要分数信息，则计算分数
	map<int, vector<cv::Rect> > detect(const cv::Mat& image, map<int, vector<float> >* score = NULL);
	//map<int, vector<cv::Rect> > detect(const cv::Mat& image, map<int, vector<cv::Rect> > &label_objs, map<int, vector<float> >* score = NULL);
	

private:
	boost::shared_ptr< caffe::Net<float> > net_;
	int class_num_;     //类别数+1   ,官方给的demo 是20+1类
};

#endif

ObjectDetector.cpp

#include "stdafx.h"
#include "ObjectDetector.hpp"
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
#include <fstream>
#include<windows.h>

using std::string;
using std::vector;
using  std::max;
using std::min;
using namespace caffe;

ObjectDetector::ObjectDetector(const std::string &model_file, const std::string &weights_file){
#ifdef CPU_ONLY
	Caffe::set_mode(Caffe::CPU);
#else
	Caffe::set_mode(Caffe::GPU);
#endif
	net_.reset(new Net<float>(model_file, TEST));
	net_->CopyTrainedLayersFrom(weights_file);
	this->class_num_ = net_->blob_by_name("cls_prob")->channels();  //求得类别数+1
}

//对一张图片，进行检测，将结果保存进map数据结构里,分别表示每个类别对应的目标框，如果需要分数信息，则计算分数
map<int, vector<cv::Rect> > ObjectDetector::detect(const cv::Mat& image, map<int, vector<float> >* objectScore){
//map<int, vector<cv::Rect> > ObjectDetector::detect(const cv::Mat& image, map<int, vector<cv::Rect> > &label_objs, map<int, vector<float> >* objectScore){
	//
	int t1 = GetTickCount();

	if (objectScore != NULL)   //如果需要保存置信度
		objectScore->clear();

	float CONF_THRESH = 0.8;  //置信度阈值
	float NMS_THRESH = 0.3;   //非极大值抑制阈值
	int max_side = max(image.rows, image.cols);   //分别求出图片宽和高的较大者
	int min_side = min(image.rows, image.cols);
	float max_side_scale = float(max_side) / float(INPUT_SIZE_LONG);    //分别求出缩放因子
	float min_side_scale = float(min_side) / float(INPUT_SIZE_NARROW);
	float max_scale = max(max_side_scale, min_side_scale);

	float img_scale = float(1) / max_scale;
	int height = int(image.rows * img_scale);
	int width = int(image.cols * img_scale);

	//
	int t2 = GetTickCount();
	cout << "detect, t2-t1=" << t2 - t1 << "ms" << endl;

	int num_out;
	cv::Mat cv_resized;
	image.convertTo(cv_resized, CV_32FC3);
	cv::resize(cv_resized, cv_resized, cv::Size(width, height));
	cv::Mat mean(height, width, cv_resized.type(), cv::Scalar(102.9801, 115.9465, 122.7717));
	cv::Mat normalized;
	subtract(cv_resized, mean, normalized);

	//
	int t3 = GetTickCount();
	cout << "detect, t3-t2=" << t3 - t2 << "ms" << endl;

	float im_info[3];
	im_info[0] = height;
	im_info[1] = width;
	im_info[2] = img_scale;
	caffe::shared_ptr<Blob<float> > input_layer = net_->blob_by_name("data");
	input_layer->Reshape(1, normalized.channels(), height, width);
	net_->Reshape();
	float* input_data = input_layer->mutable_cpu_data();
	vector<cv::Mat> input_channels;
	for (int i = 0; i < input_layer->channels(); ++i) {
		cv::Mat channel(height, width, CV_32FC1, input_data);
		input_channels.push_back(channel);
		input_data += height * width;
	}

	//
	int t3_1 = GetTickCount();
	cout << "detect, t3_1-t3=" << t3_1 - t3 << "ms" << endl;

	cv::split(normalized, input_channels);
	net_->blob_by_name("im_info")->set_cpu_data(im_info);

	//
	int t3_1_1 = GetTickCount();
	cout << "detect, t3_1_1-t3_1=" << t3_1_1 - t3_1 << "ms" << endl;

	net_->Forward();                                       //进行网络前向传播

	//
	int t4 = GetTickCount();
	cout << "detect, t4-t3_1_1=" << t4 - t3_1_1 << "ms" << endl;

	int num = net_->blob_by_name("rois")->num();    //产生的 ROI 个数,比如为 13949个ROI
	const float *rois_data = net_->blob_by_name("rois")->cpu_data();    //维度比如为：13949*5*1*1
	//int num1 = net_->blob_by_name("bbox_pred")->num();   //预测的矩形框 维度为 13949*84
	cv::Mat rois_box(num, 4, CV_32FC1);
	for (int i = 0; i < num; ++i)
	{
		rois_box.at<float>(i, 0) = rois_data[i * 5 + 1] / img_scale;
		rois_box.at<float>(i, 1) = rois_data[i * 5 + 2] / img_scale;
		rois_box.at<float>(i, 2) = rois_data[i * 5 + 3] / img_scale;
		rois_box.at<float>(i, 3) = rois_data[i * 5 + 4] / img_scale;
	}

	//
	int t5 = GetTickCount();
	cout << "detect, t5-t4=" << t5 - t4 << "ms" << endl;

	caffe::shared_ptr<Blob<float> > bbox_delt_data = net_->blob_by_name("bbox_pred");   // 13949*84
	caffe::shared_ptr<Blob<float> > score = net_->blob_by_name("cls_prob");             // 3949*21

	map<int, vector<cv::Rect> > label_objs;    //每个类别，对应的检测目标框
	for (int i = 1; i < class_num_; ++i){     //对每个类，进行遍历
		cv::Mat bbox_delt(num, 4, CV_32FC1);
		for (int j = 0; j < num; ++j){
			bbox_delt.at<float>(j, 0) = bbox_delt_data->data_at(j, i * 4 + 0, 0, 0);
			bbox_delt.at<float>(j, 1) = bbox_delt_data->data_at(j, i * 4 + 1, 0, 0);
			bbox_delt.at<float>(j, 2) = bbox_delt_data->data_at(j, i * 4 + 2, 0, 0);
			bbox_delt.at<float>(j, 3) = bbox_delt_data->data_at(j, i * 4 + 3, 0, 0);
		}
		//cout << "rois_box：" << rois_box << endl;
		//cout << "bbox_delt:" << bbox_delt << endl;
		cv::Mat box_class = RPN::bbox_tranform_inv(rois_box, bbox_delt);
		//cout << "box_class:" << box_class << endl;

		vector<RPN::abox> aboxes;   //对于 类别i，检测出的矩形框保存在这
		for (int j = 0; j < box_class.rows; ++j){
			if (box_class.at<float>(j, 0) < 0)  box_class.at<float>(j, 0) = 0;
			if (box_class.at<float>(j, 0) > (image.cols - 1))   box_class.at<float>(j, 0) = image.cols - 1;
			if (box_class.at<float>(j, 2) < 0)  box_class.at<float>(j, 2) = 0;
			if (box_class.at<float>(j, 2) > (image.cols - 1))   box_class.at<float>(j, 2) = image.cols - 1;

			if (box_class.at<float>(j, 1) < 0)  box_class.at<float>(j, 1) = 0;
			if (box_class.at<float>(j, 1) > (image.rows - 1))   box_class.at<float>(j, 1) = image.rows - 1;
			if (box_class.at<float>(j, 3) < 0)  box_class.at<float>(j, 3) = 0;
			if (box_class.at<float>(j, 3) > (image.rows - 1))   box_class.at<float>(j, 3) = image.rows - 1;
			RPN::abox tmp;
			tmp.x1 = box_class.at<float>(j, 0);
			tmp.y1 = box_class.at<float>(j, 1);
			tmp.x2 = box_class.at<float>(j, 2);
			tmp.y2 = box_class.at<float>(j, 3);
			tmp.score = score->data_at(j, i, 0, 0);
			aboxes.push_back(tmp);
		}
		std::sort(aboxes.rbegin(), aboxes.rend());
		//RPN::nms(aboxes, NMS_THRESH);  //与非极大值抑制消除对于的矩形框
		for (int k = 0; k < aboxes.size();){
			if (aboxes[k].score < CONF_THRESH)
				aboxes.erase(aboxes.begin() + k);
			else
				k++;
		}
		//################ 将类别i的所有检测框，保存
		vector<cv::Rect> rect(aboxes.size());    //对于类别i，检测出的矩形框
		for (int ii = 0; ii<aboxes.size(); ++ii)
			rect[ii] = cv::Rect(cv::Point(aboxes[ii].x1, aboxes[ii].y1), cv::Point(aboxes[ii].x2, aboxes[ii].y2));
		label_objs[i] = rect;
		//################ 将类别i的所有检测框的打分，保存
		if (objectScore != NULL){           //################ 将类别i的所有检测框的打分，保存
			vector<float> tmp(aboxes.size());       //对于 类别i，检测出的矩形框的得分
			for (int ii = 0; ii<aboxes.size(); ++ii)
				tmp[ii] = aboxes[ii].score;
			objectScore->insert(pair<int, vector<float> >(i, tmp));
		}
	}

	//
	int t6 = GetTickCount();
	cout << "detect, t6-t5=" << t6 - t5 << "ms" << endl;

	return label_objs;
}

main.cpp

#include "stdafx.h"
#include "ObjectDetector.hpp"
#include "head.h"
#include<opencv2/opencv.hpp>
#include<iostream>
#include<sstream>
#include<windows.h>

using namespace cv;
using namespace std;

string num2str(float i){
	stringstream ss;
	ss << i;
	return ss.str();
}
vector<string> files;


int main(int argc, char **argv){
	::google::InitGoogleLogging(argv[0]);
#ifdef CPU_ONLY
	cout << "Use CPU\n";
#else
	cout << "Use GPU\n";
#endif
	
	string file_path = "C://Users//w//Desktop//test//*.png";
	glob(file_path, files, false);
	size_t count = files.size();
	int t3 = GetTickCount();
	ObjectDetector detect("faster_rcnn_test.pt", "faster_rcnn_final.caffemodel");
	int t4 = GetTickCount();
	cout << "加载模型：" << t4 - t3 << "ms" << endl;

	int t1 = GetTickCount();
	for (int i = 0; i < count; i++){
		Mat img = imread(files[i]);
		map<int, vector<float> > score;
		map<int, vector<Rect> > label_objs = detect.detect(img, &score);  //目标检测,同时保存每个框的置信度
		//map<int, vector<Rect> > label_objs;
		//detect.detect(img, label_objs, &score);  //目标检测,同时保存每个框的置信度
		detect.detect(img, &score);  //目标检测,同时保存每个框的置信度
		//map<int, vector<Rect> > temp;

		cout << "label_objes.size()=" << label_objs.size() << endl;
		for (map<int, vector<Rect> >::iterator it = label_objs.begin(); it != label_objs.end(); it++){
			int label = it->first;  //标签
			vector<Rect> rects = it->second;  //检测框
			//for (int j = 0; j<rects.size(); j++){
			//	rectangle(img, rects[j], Scalar(0, 0, 255), 2);   //画出矩形框
			//	string txt = num2str(label) + " : " + num2str(score[label][j]);
			//	putText(img, txt, Point(rects[j].x, rects[j].y), CV_FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); //标记 类别：置信度
			//}
		}
	}

	int t2 =  GetTickCount();
	cout << "运行时间：" << (t2-t1)/count << "ms" << endl;
	/*imshow("", img);
	waitKey();
	return 0;*/
}

代码都详细的贴上来了，然后编译项目成功的关键是将detect的属性页配置好。
1.首先选择 “VC++ 目录” ->“包含目录”,将自己用到的依赖include目录加进来，我把自己的贴上来；
大概就酱
2.然后是，“VC++目录”->“库目录”；

3.“链接器”-“输入”-“附加依赖项”
附加依赖项，很全了
4.还有两个地方需要更改，因为我是visual studio2013新手，报了错，查到自己的项目是win32项目，所以
“C/C++”-“预处理器”-“预处理器定义”处，我改为

“链接器”-“系统”-“子系统”处改为