MTCNN caffe 与 ncnn 实现代码

最新推荐文章于 2024-05-11 16:33:55 发布

NineDays66

最新推荐文章于 2024-05-11 16:33:55 发布

阅读量1.1k

点赞数

分类专栏：人脸处理目标检测

本文链接：https://blog.csdn.net/u011808673/article/details/89356292

版权

人脸处理同时被 2 个专栏收录

63 篇文章 9 订阅

订阅专栏

目标检测

14 篇文章 3 订阅

订阅专栏

Caffe version

mtcnn.h

#ifndef _MTCNN_H_
#define _MTCNN_H_

#include <caffe/caffe.hpp>
#include <opencv2/opencv.hpp>
#include <vector>

#ifdef _OPENMP
#include <omp.h>
#endif

using std::string;
using std::vector;

typedef struct FaceBox {
	float xmin;
	float ymin;
	float xmax;
	float ymax;
	float score;
} FaceBox;
typedef struct FaceInfo {
	float bbox_reg[4];
	float landmark_reg[10];
	float landmark[10];
	FaceBox bbox;
} FaceInfo;

class MTCNN {
public:
	MTCNN(const string& proto_model_dir);
	MTCNN(const string& proto_model_dir, int min_face, float threhold_p=0.7f, float threhold_r=0.6f, float threhold_o = 0.6f, int device = 0, float iou_threhold = 0.7f, float factor = 0.709f);
	vector<FaceInfo> Detect(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage);
	vector<FaceInfo> Detect(const cv::Mat& img, const int stage);
protected:
	vector<FaceInfo> ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor);
	vector<FaceInfo> NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold);
	void BBoxRegression(vector<FaceInfo>& bboxes);
	void BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height);
	void BBoxPad(vector<FaceInfo>& bboxes, int width, int height);
	void GenerateBBox(caffe::Blob<float>* confidence, caffe::Blob<float>* reg_box, float scale, float thresh);
	std::vector<FaceInfo> NMS(std::vector<FaceInfo>& bboxes, float thresh, char methodType);
	float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false);

private:
	boost::shared_ptr<caffe::Net<float>> PNet_;
	boost::shared_ptr<caffe::Net<float>> RNet_;
	boost::shared_ptr<caffe::Net<float>> ONet_;

	std::vector<FaceInfo> candidate_boxes_;
	std::vector<FaceInfo> total_boxes_;

	//omp
	int threads_num = 4;

	//pnet config
	const float pnet_stride = 2;
	const float pnet_cell_size = 12;
	const int pnet_max_detect_num = 5000;
	//mean & std
	const float mean_val = 127.5f;
	const float std_val = 0.0078125f;
	//minibatch size
	const int step_size = 128;

	//mtcnn inference param.
	int min_face=24;
	float threhold_p=0.7f;
	float threhold_r=0.6f;
	float threhold_o=0.6f;
	float iou_threhold =0.7f;
	float factor= 0.709f;
};








#endif // _MTCNN_H_

mtcnn.cpp


//#include <fstream>
#include <boost/shared_ptr.hpp>
#include <iostream>
#include "mtcnn.h"

bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
	return a.bbox.score > b.bbox.score;
}

float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax,
	float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) {
	float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1;
	float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1;
	if (iw <= 0 || ih <= 0)
		return 0;
	float s = iw*ih;
	if (is_iom) {
		float ov = s / std::min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1));
		return ov;
	}
	else {
		float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s);
		return ov;
	}
}
std::vector<FaceInfo> MTCNN::NMS(std::vector<FaceInfo>& bboxes,
	float thresh, char methodType) {
	std::vector<FaceInfo> bboxes_nms;
	if (bboxes.size() == 0) {
		return bboxes_nms;
	}
	std::sort(bboxes.begin(), bboxes.end(), CompareBBox);

	int32_t select_idx = 0;
	int32_t num_bbox = static_cast<int32_t>(bboxes.size());
	std::vector<int32_t> mask_merged(num_bbox, 0);
	bool all_merged = false;

	while (!all_merged) {
		while (select_idx < num_bbox && mask_merged[select_idx] == 1)
			select_idx++;
		if (select_idx == num_bbox) {
			all_merged = true;
			continue;
		}

		bboxes_nms.push_back(bboxes[select_idx]);
		mask_merged[select_idx] = 1;

		FaceBox select_bbox = bboxes[select_idx].bbox;
		float area1 = static_cast<float>((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1));
		float x1 = static_cast<float>(select_bbox.xmin);
		float y1 = static_cast<float>(select_bbox.ymin);
		float x2 = static_cast<float>(select_bbox.xmax);
		float y2 = static_cast<float>(select_bbox.ymax);

		select_idx++;
#pragma omp parallel for num_threads(threads_num)
		for (int32_t i = select_idx; i < num_bbox; i++) {
			if (mask_merged[i] == 1)
				continue;

			FaceBox & bbox_i = bboxes[i].bbox;
			float x = std::max<float>(x1, static_cast<float>(bbox_i.xmin));
			float y = std::max<float>(y1, static_cast<float>(bbox_i.ymin));
			float w = std::min<float>(x2, static_cast<float>(bbox_i.xmax)) - x + 1;
			float h = std::min<float>(y2, static_cast<float>(bbox_i.ymax)) - y + 1;
			if (w <= 0 || h <= 0)
				continue;

			float area2 = static_cast<float>((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1));
			float area_intersect = w * h;

			switch (methodType) {
			case 'u':
				if (static_cast<float>(area_intersect) / (area1 + area2 - area_intersect) > thresh)
					mask_merged[i] = 1;
				break;
			case 'm':
				if (static_cast<float>(area_intersect) / std::min(area1, area2) > thresh)
					mask_merged[i] = 1;
				break;
			default:
				break;
			}
		}
	}
	return bboxes_nms;
}
void MTCNN::BBoxRegression(vector<FaceInfo>& bboxes) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		float *bbox_reg = bboxes[i].bbox_reg;
		float w = bbox.xmax - bbox.xmin + 1;
		float h = bbox.ymax - bbox.ymin + 1;
		bbox.xmin += bbox_reg[0] * w;
		bbox.ymin += bbox_reg[1] * h;
		bbox.xmax += bbox_reg[2] * w;
		bbox.ymax += bbox_reg[3] * h;
	}
}
void MTCNN::BBoxPad(vector<FaceInfo>& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		bbox.xmin = round(std::max(bbox.xmin, 0.f));
		bbox.ymin = round(std::max(bbox.ymin, 0.f));
		bbox.xmax = round(std::min(bbox.xmax, width - 1.f));
		bbox.ymax = round(std::min(bbox.ymax, height - 1.f));
	}
}
void MTCNN::BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		float w = bbox.xmax - bbox.xmin + 1;
		float h = bbox.ymax - bbox.ymin + 1;
		float side = h>w ? h : w;
		bbox.xmin = round(std::max(bbox.xmin + (w - side)*0.5f, 0.f));

		bbox.ymin = round(std::max(bbox.ymin + (h - side)*0.5f, 0.f));
		bbox.xmax = round(std::min(bbox.xmin + side - 1, width - 1.f));
		bbox.ymax = round(std::min(bbox.ymin + side - 1, height - 1.f));
	}
}
void MTCNN::GenerateBBox(caffe::Blob<float>* confidence, caffe::Blob<float>* reg_box,
	float scale, float thresh) {
	int feature_map_w_ = confidence->width();
	int feature_map_h_ = confidence->height();
	int spatical_size = feature_map_w_*feature_map_h_;
	const float* confidence_data = confidence->cpu_data() + spatical_size;
	const float* reg_data = reg_box->cpu_data();
	candidate_boxes_.clear();
	for (int i = 0; i<spatical_size; i++) {
		if (confidence_data[i] >= thresh) {

			int y = i / feature_map_w_;
			int x = i - feature_map_w_ * y;
			FaceInfo faceInfo;
			FaceBox &faceBox = faceInfo.bbox;

			faceBox.xmin = (float)(x * pnet_stride) / scale;
			faceBox.ymin = (float)(y * pnet_stride) / scale;
			faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale;
			faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale;

			faceInfo.bbox_reg[0] = reg_data[i];
			faceInfo.bbox_reg[1] = reg_data[i + spatical_size];
			faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size];
			faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size];

			faceBox.score = confidence_data[i];
			candidate_boxes_.push_back(faceInfo);
		}
	}
}
MTCNN::MTCNN(const string& proto_model_dir) {
	caffe::Caffe::set_mode(caffe::Caffe::GPU);
	PNet_.reset(new caffe::Net<float>((proto_model_dir + "/det1.prototxt"), caffe::TEST));
	PNet_->CopyTrainedLayersFrom(proto_model_dir + "/det1.caffemodel");
	RNet_.reset(new caffe::Net<float>((proto_model_dir + "/det2.prototxt"), caffe::TEST));
	RNet_->CopyTrainedLayersFrom(proto_model_dir + "/det2.caffemodel");
	ONet_.reset(new caffe::Net<float>((proto_model_dir + "/det3.prototxt"), caffe::TEST));
	ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3.caffemodel");
	//ONet_.reset(new Net<float>((proto_model_dir + "/det3-half.prototxt"), TEST));
	//ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3-half.caffemodel");

	caffe::Blob<float>* input_layer;
	input_layer = PNet_->input_blobs()[0];
	int num_channels_ = input_layer->channels();
	CHECK(num_channels_ == 3) << "Input layer should have 3 channels.";
}

MTCNN::MTCNN(const string& proto_model_dir, int min_face, float threhold_p, float threhold_r, float threhold_o, 
	int device, float iou_threhold , float factor):
	min_face(min_face),
	threhold_p(threhold_p),
	threhold_r(threhold_r),
	threhold_o(threhold_o),
	iou_threhold(iou_threhold),
	factor(factor){

#ifdef _OPENMP
	threads_num = omp_get_num_threads();
#endif

	if (device < 0) {
		caffe::Caffe::set_mode(caffe::Caffe::CPU);
	}
	else {
		caffe::Caffe::SetDevice(device);
		caffe::Caffe::set_mode(caffe::Caffe::GPU);
	}

	PNet_.reset(new caffe::Net<float>((proto_model_dir + "/det1.prototxt"), caffe::TEST));
	PNet_->CopyTrainedLayersFrom(proto_model_dir + "/det1.caffemodel");
	RNet_.reset(new caffe::Net<float>((proto_model_dir + "/det2.prototxt"), caffe::TEST));
	RNet_->CopyTrainedLayersFrom(proto_model_dir + "/det2.caffemodel");
	ONet_.reset(new caffe::Net<float>((proto_model_dir + "/det3.prototxt"), caffe::TEST));
	ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3.caffemodel");
	//ONet_.reset(new Net<float>((proto_model_dir + "/det3-half.prototxt"), TEST));
	//ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3-half.caffemodel");

	caffe::Blob<float>* input_layer;
	input_layer = PNet_->input_blobs()[0];
	int num_channels_ = input_layer->channels();
}


vector<FaceInfo> MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) {
	cv::Mat  resized;
	int width = img.cols;
	int height = img.rows;
	float scale = 12.f / minSize;
	float minWH = std::min(height, width) *scale;
	std::vector<float> scales;
	while (minWH >= 12) {
		scales.push_back(scale);
		minWH *= factor;
		scale *= factor;
	}
	caffe::Blob<float>* input_layer = PNet_->input_blobs()[0];
	total_boxes_.clear();
	for (int i = 0; i < scales.size(); i++) {
		int ws = (int)std::ceil(width*scales[i]);
		int hs = (int)std::ceil(height*scales[i]);
		cv::resize(img, resized, cv::Size(ws, hs), 0, 0, cv::INTER_LINEAR);
		input_layer->Reshape(1, 3, hs, ws);
		PNet_->Reshape();
		float * input_data = input_layer->mutable_cpu_data();
		cv::Vec3b * img_data = (cv::Vec3b *)resized.data;
		int spatial_size = ws* hs;
		for (int k = 0; k < spatial_size; ++k) {
			input_data[k] = float((img_data[k][0] - mean_val)* std_val);
			input_data[k + spatial_size] = float((img_data[k][1] - mean_val) * std_val);
			input_data[k + 2 * spatial_size] = float((img_data[k][2] - mean_val) * std_val);
		}
		PNet_->Forward();

		caffe::Blob<float>* confidence = PNet_->blob_by_name("prob1").get();
		caffe::Blob<float>* reg = PNet_->blob_by_name("conv4-2").get();
		GenerateBBox(confidence, reg, scales[i], threshold);
		std::vector<FaceInfo> bboxes_nms = NMS(candidate_boxes_, 0.5f, 'u');
		if (bboxes_nms.size()>0) {
			total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end());
		}
	}
	int num_box = (int)total_boxes_.size();
	vector<FaceInfo> res_boxes;
	if (num_box != 0) {
		res_boxes = NMS(total_boxes_, 0.7f, 'u');
		BBoxRegression(res_boxes);
		BBoxPadSquare(res_boxes, width, height);
	}
	return res_boxes;
}
vector<FaceInfo> MTCNN::NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) {
	vector<FaceInfo> res;
	int batch_size = (int)pre_stage_res.size();
	if (batch_size == 0)
		return res;
	caffe::Blob<float>* input_layer = nullptr;
	caffe::Blob<float>* confidence = nullptr;
	caffe::Blob<float>* reg_box = nullptr;
	caffe::Blob<float>* reg_landmark = nullptr;

	switch (stage_num) {
	case 2: {
		input_layer = RNet_->input_blobs()[0];
		input_layer->Reshape(batch_size, 3, input_h, input_w);
		RNet_->Reshape();
	}break;
	case 3: {
		input_layer = ONet_->input_blobs()[0];
		input_layer->Reshape(batch_size, 3, input_h, input_w);
		ONet_->Reshape();
	}break;
	default:
		return res;
		break;
	}
	float * input_data = input_layer->mutable_cpu_data();
	int spatial_size = input_h*input_w;

#pragma omp parallel for num_threads(threads_num)
	for (int n = 0; n < batch_size; ++n) {
		FaceBox &box = pre_stage_res[n].bbox;
		cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
		resize(roi, roi, cv::Size(input_w, input_h));
		float *input_data_n = input_data + input_layer->offset(n);
		cv::Vec3b *roi_data = (cv::Vec3b *)roi.data;
		CHECK_EQ(roi.isContinuous(), true);
		for (int k = 0; k < spatial_size; ++k) {
			input_data_n[k] = float((roi_data[k][0] - mean_val)*std_val);
			input_data_n[k + spatial_size] = float((roi_data[k][1] - mean_val)*std_val);
			input_data_n[k + 2 * spatial_size] = float((roi_data[k][2] - mean_val)*std_val);
		}
	}
	switch (stage_num) {
	case 2: {
		RNet_->Forward();
		confidence = RNet_->blob_by_name("prob1").get();
		reg_box = RNet_->blob_by_name("conv5-2").get();
	}break;
	case 3: {
		ONet_->Forward();
		confidence = ONet_->blob_by_name("prob1").get();
		reg_box = ONet_->blob_by_name("conv6-2").get();
		reg_landmark = ONet_->blob_by_name("conv6-3").get();
	}break;
	}
	const float* confidence_data = confidence->cpu_data();
	const float* reg_data = reg_box->cpu_data();
	const float* landmark_data = nullptr;
	if (reg_landmark) {
		landmark_data = reg_landmark->cpu_data();
	}
	for (int k = 0; k < batch_size; ++k) {
		if (confidence_data[2 * k + 1] >= threshold) {
			FaceInfo info;
			info.bbox.score = confidence_data[2 * k + 1];
			info.bbox.xmin = pre_stage_res[k].bbox.xmin;
			info.bbox.ymin = pre_stage_res[k].bbox.ymin;
			info.bbox.xmax = pre_stage_res[k].bbox.xmax;
			info.bbox.ymax = pre_stage_res[k].bbox.ymax;
			for (int i = 0; i < 4; ++i) {
				info.bbox_reg[i] = reg_data[4 * k + i];
			}
			if (reg_landmark) {
				float w = info.bbox.xmax - info.bbox.xmin + 1.f;
				float h = info.bbox.ymax - info.bbox.ymin + 1.f;
				for (int i = 0; i < 5; ++i) {
					info.landmark[2 * i] = landmark_data[10 * k + 2 * i] * w + info.bbox.xmin;
					info.landmark[2 * i + 1] = landmark_data[10 * k + 2 * i + 1] * h + info.bbox.ymin;
				}
			}
			res.push_back(info);
		}
	}
	return res;
}



vector<FaceInfo> MTCNN::Detect(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) {
	vector<FaceInfo> pnet_res;
	vector<FaceInfo> rnet_res;
	vector<FaceInfo> onet_res;
	if (stage >= 1) {
		pnet_res = ProposalNet(image, minSize, threshold[0], factor);
	}
	if (stage >= 2 && pnet_res.size()>0) {
		if (pnet_max_detect_num < (int)pnet_res.size()) {
			pnet_res.resize(pnet_max_detect_num);
		}
		int num = (int)pnet_res.size();
		int size = (int)ceil(1.f*num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(pnet_res.begin() + start, pnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 24, 24, 2, threshold[1]);
			rnet_res.insert(rnet_res.end(), res.begin(), res.end());
		}
		rnet_res = NMS(rnet_res, 0.7f, 'u');
		BBoxRegression(rnet_res);
		BBoxPadSquare(rnet_res, image.cols, image.rows);

	}
	if (stage >= 3 && rnet_res.size()>0) {
		int num = (int)rnet_res.size();
		int size = (int)ceil(1.f*num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(rnet_res.begin() + start, rnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 48, 48, 3, threshold[2]);
			onet_res.insert(onet_res.end(), res.begin(), res.end());
		}
		BBoxRegression(onet_res);
		onet_res = NMS(onet_res, 0.7f, 'm');
		BBoxPad(onet_res, image.cols, image.rows);

	}
	if (stage == 1) {
		return pnet_res;
	}
	else if (stage == 2) {
		return rnet_res;
	}
	else if (stage == 3) {
		return onet_res;
	}
	else {
		return onet_res;
	}
}

vector<FaceInfo> MTCNN::Detect(const cv::Mat& image, const int stage) {
	vector<FaceInfo> pnet_res;
	vector<FaceInfo> rnet_res;
	vector<FaceInfo> onet_res;
	if (stage >= 1) {
		pnet_res = ProposalNet(image, min_face, threhold_p, factor);
	}
	if (stage >= 2 && pnet_res.size()>0) {
		if (pnet_max_detect_num < (int)pnet_res.size()) {
			pnet_res.resize(pnet_max_detect_num);
		}
		int num = (int)pnet_res.size();
		int size = (int)ceil(1.f*num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(pnet_res.begin() + start, pnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 24, 24, 2, threhold_r);
			rnet_res.insert(rnet_res.end(), res.begin(), res.end());
		}
		rnet_res = NMS(rnet_res, iou_threhold, 'u');
		BBoxRegression(rnet_res);
		BBoxPadSquare(rnet_res, image.cols, image.rows);

	}
	if (stage >= 3 && rnet_res.size()>0) {
		int num = (int)rnet_res.size();
		int size = (int)ceil(1.f*num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(rnet_res.begin() + start, rnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 48, 48, 3, threhold_o);
			onet_res.insert(onet_res.end(), res.begin(), res.end());
		}
		BBoxRegression(onet_res);
		onet_res = NMS(onet_res, iou_threhold, 'm');
		BBoxPad(onet_res, image.cols, image.rows);

	}
	if (stage == 1) {
		return pnet_res;
	}
	else if (stage == 2) {
		return rnet_res;
	}
	else if (stage == 3) {
		return onet_res;
	}
	else {
		return onet_res;
	}
}

NCNN version

ncnn 版本在 linux 系统或者嵌入式系统更快

mtcnn.h

#ifndef _MTCNN_H_
#define _MTCNN_H_

#include <opencv2/opencv.hpp>
#include <memory>
#include <vector>
#include <net.h>

#ifdef _OPENMP
#include <omp.h>
#endif

using std::string;
using std::vector;

typedef struct FaceBox {
	float xmin;
	float ymin;
	float xmax;
	float ymax;
	float score;
} FaceBox;
typedef struct FaceInfo {
	float bbox_reg[4];
	float landmark_reg[10];
	float landmark[10];
	FaceBox bbox;
} FaceInfo;

class MTCNN {
public:
	MTCNN(const string& proto_model_dir, int min_face, float threhold_p=0.7f, float threhold_r=0.6f, float threhold_o = 0.6f, int device = 0, float iou_threhold = 0.7f, float factor = 0.709f);
	//vector<FaceInfo> Detect(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage);
	vector<FaceInfo> Detect(const cv::Mat& img, const int stage);
	~MTCNN();
protected:
	vector<FaceInfo> ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor);
	vector<FaceInfo> NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold);
	void BBoxRegression(vector<FaceInfo>& bboxes);
	void BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height);
	void BBoxPad(vector<FaceInfo>& bboxes, int width, int height);
	//
	void GenerateBBox(ncnn::Mat & confidence, ncnn::Mat& reg_box, float scale, float thresh);
	//
	std::vector<FaceInfo> NMS(std::vector<FaceInfo>& bboxes, float thresh, char methodType);
	float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false);
private:
	std::shared_ptr<ncnn::Net> PNet_;
	std::shared_ptr<ncnn::Net> RNet_;
	std::shared_ptr<ncnn::Net> ONet_;

	std::vector<FaceInfo> candidate_boxes_;
	std::vector<FaceInfo> total_boxes_;

	//omp
	int threads_num = 4;

	//pnet config
	const float pnet_stride = 2;
	const float pnet_cell_size = 12;
	const int pnet_max_detect_num = 5000;
	//mean & std
	const float mean_val = 127.5f;
	const float std_val = 0.0078125f;
	//minibatch size
	const int step_size = 128;

	//mtcnn inference param.
	int min_face=24;
	float threhold_p=0.7f;
	float threhold_r=0.6f;
	float threhold_o=0.6f;
	float iou_threhold =0.7f;
	float factor= 0.709f;
};








#endif // _MTCNN_H_

mtcnn.cpp


#include <iostream>
#include <net.h>
#include "mtcnn.h"

bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
	return a.bbox.score > b.bbox.score;
}

float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax,
	float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) {
	float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1;
	float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1;
	if (iw <= 0 || ih <= 0)
		return 0;
	float s = iw*ih;
	if (is_iom) {
		float ov = s / std::min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1));
		return ov;
	}
	else {
		float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s);
		return ov;
	}
}
std::vector<FaceInfo> MTCNN::NMS(std::vector<FaceInfo>& bboxes,
	float thresh, char methodType) {
	std::vector<FaceInfo> bboxes_nms;
	if (bboxes.size() == 0) {
		return bboxes_nms;
	}
	std::sort(bboxes.begin(), bboxes.end(), CompareBBox);

	int32_t select_idx = 0;
	int32_t num_bbox = static_cast<int32_t>(bboxes.size());
	std::vector<int32_t> mask_merged(num_bbox, 0);
	bool all_merged = false;

	while (!all_merged) {
		while (select_idx < num_bbox && mask_merged[select_idx] == 1)
			select_idx++;
		if (select_idx == num_bbox) {
			all_merged = true;
			continue;
		}
		bboxes_nms.push_back(bboxes[select_idx]);
		mask_merged[select_idx] = 1;

		FaceBox select_bbox = bboxes[select_idx].bbox;
		float area1 = static_cast<float>((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1));
		float x1 = static_cast<float>(select_bbox.xmin);
		float y1 = static_cast<float>(select_bbox.ymin);
		float x2 = static_cast<float>(select_bbox.xmax);
		float y2 = static_cast<float>(select_bbox.ymax);

		select_idx++;
#pragma omp parallel for num_threads(threads_num)
		for (int32_t i = select_idx; i < num_bbox; i++) {
			if (mask_merged[i] == 1)
				continue;

			FaceBox & bbox_i = bboxes[i].bbox;
			float x = std::max<float>(x1, static_cast<float>(bbox_i.xmin));
			float y = std::max<float>(y1, static_cast<float>(bbox_i.ymin));
			float w = std::min<float>(x2, static_cast<float>(bbox_i.xmax)) - x + 1;
			float h = std::min<float>(y2, static_cast<float>(bbox_i.ymax)) - y + 1;
			if (w <= 0 || h <= 0)
				continue;

			float area2 = static_cast<float>((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1));
			float area_intersect = w * h;

			switch (methodType) {
			case 'u':
				if (static_cast<float>(area_intersect) / (area1 + area2 - area_intersect) > thresh)
					mask_merged[i] = 1;
				break;
			case 'm':
				if (static_cast<float>(area_intersect) / std::min(area1, area2) > thresh)
					mask_merged[i] = 1;
				break;
			default:
				break;
			}
		}
	}
	return bboxes_nms;
}
void MTCNN::BBoxRegression(vector<FaceInfo>& bboxes) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		float *bbox_reg = bboxes[i].bbox_reg;
		float w = bbox.xmax - bbox.xmin + 1;
		float h = bbox.ymax - bbox.ymin + 1;
		bbox.xmin += bbox_reg[0] * w;
		bbox.ymin += bbox_reg[1] * h;
		bbox.xmax += bbox_reg[2] * w;
		bbox.ymax += bbox_reg[3] * h;
	}
}
void MTCNN::BBoxPad(vector<FaceInfo>& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		bbox.xmin = round(std::max(bbox.xmin, 0.f));
		bbox.ymin = round(std::max(bbox.ymin, 0.f));
		bbox.xmax = round(std::min(bbox.xmax, width - 1.f));
		bbox.ymax = round(std::min(bbox.ymax, height - 1.f));
	}
}
void MTCNN::BBoxPadSquare(vector<FaceInfo>& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
	for (int i = 0; i < bboxes.size(); ++i) {
		FaceBox &bbox = bboxes[i].bbox;
		float w = bbox.xmax - bbox.xmin + 1;
		float h = bbox.ymax - bbox.ymin + 1;
		float side = h>w ? h : w;
		bbox.xmin = round(std::max(bbox.xmin + (w - side)*0.5f, 0.f));

		bbox.ymin = round(std::max(bbox.ymin + (h - side)*0.5f, 0.f));
		bbox.xmax = round(std::min(bbox.xmin + side - 1, width - 1.f));
		bbox.ymax = round(std::min(bbox.ymin + side - 1, height - 1.f));
	}
}
void MTCNN::GenerateBBox(ncnn::Mat & confidence, ncnn::Mat& reg_box,
	float scale, float thresh) {
	int feature_map_w_ = confidence.w;
	int feature_map_h_ = confidence.h;
	int spatical_size = feature_map_w_*feature_map_h_;
	const float* confidence_data = (float *)confidence.channel(1).data;//((float*)confidence.data) + spatical_size;
	const float* reg_data = (float*)reg_box.data;
	candidate_boxes_.clear();
	for (int i = 0; i<spatical_size; i++) {
		if (confidence_data[i] >= thresh) {
			int y = i / feature_map_w_;
			int x = i - feature_map_w_ * y;
			FaceInfo faceInfo;
			FaceBox &faceBox = faceInfo.bbox;

			faceBox.xmin = (float)(x * pnet_stride) / scale;
			faceBox.ymin = (float)(y * pnet_stride) / scale;
			faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale;
			faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale;

			faceInfo.bbox_reg[0] = reg_data[i];
			faceInfo.bbox_reg[1] = reg_data[i + spatical_size];
			faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size];
			faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size];

			faceBox.score = confidence_data[i];
			candidate_boxes_.push_back(faceInfo);
		}
	}
}

MTCNN::MTCNN(const string& proto_model_dir, int min_face, float threhold_p, float threhold_r, float threhold_o, 
	int device, float iou_threhold , float factor):
	min_face(min_face),
	threhold_p(threhold_p),
	threhold_r(threhold_r),
	threhold_o(threhold_o),
	iou_threhold(iou_threhold),
	factor(factor){

#ifdef _OPENMP
	threads_num = omp_get_num_threads();
#endif

	PNet_ = std::make_shared<ncnn::Net>();
	PNet_->load_param("models/det1.param");
	PNet_->load_model("models/det1.bin");

	RNet_ = std::make_shared<ncnn::Net>();
	RNet_->load_param("models/det2.param");
	RNet_->load_model("models/det2.bin");

	ONet_ = std::make_shared<ncnn::Net>();
	ONet_->load_param("models/det3-half.param");
	ONet_->load_model("models/det3-half.bin");

}

MTCNN::~MTCNN() {
	PNet_->clear();
	RNet_->clear();
	ONet_->clear();
}

vector<FaceInfo> MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) {
	cv::Mat resized;
	int width = img.cols;
	int height = img.rows;
	float scale = 12.f / minSize;
	float minWH = std::min(height, width) *scale;
	std::vector<float> scales;
	while (minWH >= 12) {
		scales.push_back(scale);
		minWH *= factor;
		scale *= factor;
	}
	total_boxes_.clear();
	for (int i = 0; i < scales.size(); i++) {
		int ws = (int)std::ceil(width*scales[i]);
		int hs = (int)std::ceil(height*scales[i]);
		cv::resize(img, resized, cv::Size(ws, hs));
		ncnn::Extractor ex = PNet_->create_extractor();
		ex.set_light_mode(true);
		ncnn::Mat confidence;
		ncnn::Mat reg;
		ncnn::Mat in = ncnn::Mat::from_pixels(resized.data, ncnn::Mat::PIXEL_BGR, resized.cols, resized.rows);
		const float mean_vals[3] = { mean_val, mean_val, mean_val };
		const float norm_vals[3] = { std_val, std_val, std_val };
		in.substract_mean_normalize(mean_vals, norm_vals);
		ex.input("data", in);
		ex.extract("prob1", confidence);
	    ex.extract("conv4-2", reg);
		GenerateBBox(confidence, reg, scales[i], threshold);
		std::vector<FaceInfo> bboxes_nms = NMS(candidate_boxes_, 0.5f, 'u');
		if (bboxes_nms.size()>0) {
			total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end());
		}
	}

	int num_box = (int)total_boxes_.size();
	vector<FaceInfo> res_boxes;
	if (num_box != 0) {
		res_boxes = NMS(total_boxes_, 0.7f, 'u');
		BBoxRegression(res_boxes);
		BBoxPadSquare(res_boxes, width, height);
	}

	return res_boxes;
}
vector<FaceInfo> MTCNN::NextStage(const cv::Mat& image, vector<FaceInfo> &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) {
	vector<FaceInfo> res;
	int batch_size = pre_stage_res.size();
	switch (stage_num) {
	case 2: {
		for (size_t n = 0; n < batch_size; ++n)
		{
			ncnn::Extractor r_ex = RNet_->create_extractor();
			r_ex.set_light_mode(true);
			ncnn::Mat confidence;
			ncnn::Mat reg_box;
			FaceBox &box = pre_stage_res[n].bbox;
			cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
			resize(roi, roi, cv::Size(input_w, input_h));
			ncnn::Mat in = ncnn::Mat::from_pixels(roi.data, ncnn::Mat::PIXEL_BGR, roi.cols, roi.rows);
			const float mean_vals[3] = { mean_val, mean_val, mean_val };
			const float norm_vals[3] = { std_val, std_val, std_val };
			in.substract_mean_normalize(mean_vals, norm_vals);
			r_ex.input("data", in);
			r_ex.extract("prob1", confidence);
			r_ex.extract("conv5-2", reg_box);
			float conf = ((float*)confidence.data)[1];
			if (conf >= threshold) {
				FaceInfo info;
				info.bbox.score = conf;
				info.bbox.xmin = pre_stage_res[n].bbox.xmin;
				info.bbox.ymin = pre_stage_res[n].bbox.ymin;
				info.bbox.xmax = pre_stage_res[n].bbox.xmax;
				info.bbox.ymax = pre_stage_res[n].bbox.ymax;
				for (int i = 0; i < 4; ++i) {
					info.bbox_reg[i] = ((float*)reg_box.data)[i];
				}
				res.push_back(info);
			}
		}
		break;
	}
	case 3:{
		for (size_t n = 0; n < batch_size; ++n)
		{
			ncnn::Extractor o_ex = ONet_->create_extractor();
			o_ex.set_light_mode(true);
			ncnn::Mat confidence;
			ncnn::Mat reg_box;
			ncnn::Mat reg_landmark;
			FaceBox &box = pre_stage_res[n].bbox;
			cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
			resize(roi, roi, cv::Size(input_w, input_h));
			ncnn::Mat in = ncnn::Mat::from_pixels(roi.data, ncnn::Mat::PIXEL_BGR, roi.cols, roi.rows);
			const float mean_vals[3] = { mean_val, mean_val, mean_val };
			const float norm_vals[3] = { std_val, std_val, std_val };
			in.substract_mean_normalize(mean_vals, norm_vals);
			o_ex.input("data", in);
			o_ex.extract("prob1", confidence);
			o_ex.extract("conv6-2", reg_box);
			o_ex.extract("conv6-3", reg_landmark);
			float conf = ((float*)confidence.data)[1];
			if (conf >= threshold) {
				FaceInfo info;
				info.bbox.score = conf;
				info.bbox.xmin = pre_stage_res[n].bbox.xmin;
				info.bbox.ymin = pre_stage_res[n].bbox.ymin;
				info.bbox.xmax = pre_stage_res[n].bbox.xmax;
				info.bbox.ymax = pre_stage_res[n].bbox.ymax;
				for (int i = 0; i < 4; ++i) {
					info.bbox_reg[i] = ((float*)reg_box.data)[i];
				}
				float w = info.bbox.xmax - info.bbox.xmin + 1.f;
				float h = info.bbox.ymax - info.bbox.ymin + 1.f;
				for (int i = 0; i < 5; ++i) {
					info.landmark[2 * i] = ((float*)reg_landmark.data)[2 * i] * w + info.bbox.xmin;
					info.landmark[2 * i + 1] = ((float*)reg_landmark.data)[2 * i + 1] * h + info.bbox.ymin;
				}
				res.push_back(info);
			}
		}
		break;
	}
	default:
		return res;
		break;
	}
	return res;
}

//vector<FaceInfo> MTCNN::Detect(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) {
//	vector<FaceInfo> pnet_res;
//	vector<FaceInfo> rnet_res;
//	vector<FaceInfo> onet_res;
//	if (stage >= 1) {
//		pnet_res = ProposalNet(image, minSize, threshold[0], factor);
//	}
//	//double start = cv::getTickCount();
//	if (stage >= 2 && pnet_res.size()>0) {
//		if (pnet_max_detect_num < (int)pnet_res.size()) {
//			pnet_res.resize(pnet_max_detect_num);
//		}
//		int num = (int)pnet_res.size();
//		int size = (int)ceil((float)num / step_size);
//		for (int iter = 0; iter < size; ++iter) {
//			int start = iter*step_size;
//			int end = std::min(start + step_size, num);
//			vector<FaceInfo> input(pnet_res.begin() + start, pnet_res.begin() + end);
//			vector<FaceInfo> res = NextStage(image, input, 24, 24, 2, threshold[1]);
//			rnet_res.insert(rnet_res.end(), res.begin(), res.end());
//		}
//		rnet_res = NMS(rnet_res, 0.7f, 'u');
//		BBoxRegression(rnet_res);
//		BBoxPadSquare(rnet_res, image.cols, image.rows);
//
//	}
//	//double end = cv::getTickCount();
//	//std::cout << "r net :" << (end - start) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
//
//	//double start1 = cv::getTickCount();
//
//	if (stage >= 3 && rnet_res.size()>0) {
//		int num = (int)rnet_res.size();
//		int size = (int)ceil((float)num / step_size);
//		for (int iter = 0; iter < size; ++iter) {
//			int start = iter*step_size;
//			int end = std::min(start + step_size, num);
//			vector<FaceInfo> input(rnet_res.begin() + start, rnet_res.begin() + end);
//			vector<FaceInfo> res = NextStage(image, input, 48, 48, 3, threshold[2]);
//			onet_res.insert(onet_res.end(), res.begin(), res.end());
//		}
//		BBoxRegression(onet_res);
//		onet_res = NMS(onet_res, 0.7f, 'm');
//		BBoxPad(onet_res, image.cols, image.rows);
//	}
//	//double end1 = cv::getTickCount();
//	//std::cout << "o net :" << (end1 - start1) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
//	if (stage == 1) {
//		return pnet_res;
//	}
//	else if (stage == 2) {
//		return rnet_res;
//	}
//	else if (stage == 3) {
//		return onet_res;
//	}
//	else {
//		return onet_res;
//	}
//}

vector<FaceInfo> MTCNN::Detect(const cv::Mat& image, const int stage) {
	vector<FaceInfo> pnet_res;
	vector<FaceInfo> rnet_res;
	vector<FaceInfo> onet_res;
	double start = cv::getTickCount();
	if (stage >= 1) {
		pnet_res = ProposalNet(image, min_face, threhold_p, factor);
	}
	//double end = cv::getTickCount();
	//std::cout <<"p size"<< pnet_res.size() <<" p net :" << (end - start) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
	//double start1 = cv::getTickCount();
	if (stage >= 2 && pnet_res.size()>0) {
		if (pnet_max_detect_num < (int)pnet_res.size()) {
			pnet_res.resize(pnet_max_detect_num);
		}
		int num = (int)pnet_res.size();
		int size = (int)ceil((float)num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(pnet_res.begin() + start, pnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 24, 24, 2, threhold_r);
			rnet_res.insert(rnet_res.end(), res.begin(), res.end());
		}
		rnet_res = NMS(rnet_res, iou_threhold, 'u');
		BBoxRegression(rnet_res);
		BBoxPadSquare(rnet_res, image.cols, image.rows);
	}
	//double end1 = cv::getTickCount();
	//std::cout <<"r size"<<rnet_res.size() <<" r net :" << (end1 - start1) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
	//double start2 = cv::getTickCount();

	if (stage >= 3 && rnet_res.size()>0) {
		int num = (int)rnet_res.size();
		int size = (int)ceil((float)num / step_size);
		for (int iter = 0; iter < size; ++iter) {
			int start = iter*step_size;
			int end = std::min(start + step_size, num);
			vector<FaceInfo> input(rnet_res.begin() + start, rnet_res.begin() + end);
			vector<FaceInfo> res = NextStage(image, input, 48, 48, 3, threhold_o);
			onet_res.insert(onet_res.end(), res.begin(), res.end());
		}
		BBoxRegression(onet_res);
		onet_res = NMS(onet_res, iou_threhold, 'm');
		BBoxPad(onet_res, image.cols, image.rows);

	}
	//double end2 = cv::getTickCount();
	//std::cout << "o net :" << (end2 - start2) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
	if (stage == 1) {
		return pnet_res;
	}
	else if (stage == 2) {
		return rnet_res;
	}
	else if (stage == 3) {
		return onet_res;
	}
	else {
		return onet_res;
	}
}

NCNN version2

#include <stdio.h>
#include <algorithm>
#include <vector>
#include <math.h>
#include <iostream>
#include <time.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
 
#include "net.h"
#include"cpu.h"
using namespace std;
using namespace cv;
 
struct Bbox
{
    float score;
    int x1;
    int y1;
    int x2;
    int y2;
    float area;
    bool exist;
    float ppoint[10];
    float regreCoord[4];
};
 
struct orderScore
{
    float score;
    int oriOrder;
};
 
void resize_image(ncnn::Mat& srcImage, ncnn::Mat& dstImage)
{
	int src_width = srcImage.w;
	int src_height = srcImage.h;
	int src_channel = srcImage.c;
	int dst_width = dstImage.w;
	int dst_height = dstImage.h;
	int dst_channel = dstImage.c;
 
	if (src_width == dst_width && src_height == dst_height)
	{
		memcpy(dstImage.data, srcImage.data, src_width*src_height*src_channel*sizeof(float));
		return;
	}
	float lf_x_scl = static_cast<float>(src_width) / dst_width;
	float lf_y_Scl = static_cast<float>(src_height) / dst_height;
	const float* src_data = srcImage.data;
 
	float* dest_data = dstImage.data;
	int src_area = srcImage.cstep;
	int src_area2 = 2 * src_area;
	int dst_area = dstImage.cstep;
	int dst_area2 = 2 * dst_area;
 
	for (int y = 0; y < dst_height; y++) {
		for (int x = 0; x < dst_width; x++) {
			float lf_x_s = lf_x_scl * x;
			float lf_y_s = lf_y_Scl * y;
 
			int n_x_s = static_cast<int>(lf_x_s);
			n_x_s = (n_x_s <= (src_width - 2) ? n_x_s : (src_width - 2));
			int n_y_s = static_cast<int>(lf_y_s);
			n_y_s = (n_y_s <= (src_height - 2) ? n_y_s : (src_height - 2));
 
			float lf_weight_x = lf_x_s - n_x_s;
			float lf_weight_y = lf_y_s - n_y_s;
 
			float dest_val_b = (1 - lf_weight_y) * ((1 - lf_weight_x) *
				src_data[n_y_s * src_width + n_x_s] +
				lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1]) +
				lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s] +
				lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1]);
			float dest_val_g = (1 - lf_weight_y) * ((1 - lf_weight_x) *
				src_data[n_y_s * src_width + n_x_s + src_area] +
				lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1 + src_area]) +
				lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s + src_area] +
				lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1 + src_area]);
			float dest_val_r = (1 - lf_weight_y) * ((1 - lf_weight_x) *
				src_data[n_y_s * src_width + n_x_s + src_area2] +
				lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1 + src_area2]) +
				lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s + src_area2] +
				lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1 + src_area2]);
 
			dest_data[y * dst_width + x] = static_cast<float>(dest_val_b);
			dest_data[y * dst_width + x + dst_area] = static_cast<float>(dest_val_g);
			dest_data[y * dst_width + x + 2 * dst_area] = static_cast <float>(dest_val_r);
		}
	}
}
 
bool cmpScore(orderScore lsh, orderScore rsh){
    if(lsh.score<rsh.score)
        return true;
    else
        return false;
}
 
class mtcnn{
public:
    mtcnn();
    void detect(ncnn::Mat& img_, std::vector<Bbox>& finalBbox);
	cv::Mat cp_img;
private:
    void generateBbox(ncnn::Mat score, ncnn::Mat location, vector<Bbox>& boundingBox_, vector<orderScore>& bboxScore_, float scale);
    void nms(vector<Bbox> &boundingBox_, std::vector<orderScore> &bboxScore_, const float overlap_threshold, string modelname="Union");
    void refineAndSquareBbox(vector<Bbox> &vecBbox, const int &height, const int &width);
 
    ncnn::Net Pnet, Rnet, Onet;
    ncnn::Mat img;
 
	float nms_threshold[3];// = { 0.5, 0.7, 0.7 };
	float threshold[3];//  = {0.8, 0.8, 0.8};
	float mean_vals[3];//  = {127.5, 127.5, 127.5};
	float norm_vals[3];//  = {0.0078125, 0.0078125, 0.0078125};
    std::vector<Bbox> firstBbox_, secondBbox_,thirdBbox_;
    std::vector<orderScore> firstOrderScore_, secondBboxScore_, thirdBboxScore_;
    int img_w, img_h;
	
};
 
mtcnn::mtcnn(){
	for (int i = 0; i < 3; i++)
	{
		nms_threshold[i]=0.7;// = { 0.5, 0.7, 0.7 };
		threshold[i]=0.7;//  = {0.8, 0.8, 0.8};
		mean_vals[i]=127.5;//  = {127.5, 127.5, 127.5};
		norm_vals[i]=0.0078125;//  = {0.0078125, 0.0078125, 0.0078125};
	}
	nms_threshold[0] = 0.5;
    Pnet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det1.param");
    Pnet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det1.bin");
    Rnet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det2.param");
    Rnet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det2.bin");
    Onet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det3.param");
    Onet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det3.bin");
	//cp_img.create(295, 413, CV_8UC3);
	//const char* imagepath = "E:/Algrithm/ncnn/ncnn/x64/Release/test3.jpg";// argv[1];
 
	//cp_img = cv::imread(imagepath);
}
 
/******************generateBbox******************************/
//根据Pnet的输出结果，由滑框的得分，筛选可能是人脸的滑框，并记录该框的位置、人脸坐标信息、得分以及编号
void mtcnn::generateBbox(ncnn::Mat score, ncnn::Mat location, std::vector<Bbox>& boundingBox_, std::vector<orderScore>& bboxScore_, float scale){
    int stride = 2;//Pnet中有一次MP2*2，后续转换的时候相当于stride=2；
    int cellsize = 12;
    int count = 0;
    //score p
    float *p = score.channel(1);//score.data + score.cstep;//判定为人脸的概率
    //float *plocal = location.data;
    Bbox bbox;
    orderScore order;
//	float max_p = 0;
    for(int row=0;row<score.h;row++){
        for(int col=0;col<score.w;col++){
			//printf("Pnet prob: %f\n", *p);
			//if (*p>max_p)
			//{
			//	max_p = *p;
			//}
            if(*p>threshold[0]){
                bbox.score = *p;//记录得分
                order.score = *p;
                order.oriOrder = count;//记录有效滑框的编号
                bbox.x1 = round((stride*col+1)/scale);//12*12的滑框，换算到原始图像上的坐标
                bbox.y1 = round((stride*row+1)/scale);
                bbox.x2 = round((stride*col+1+cellsize)/scale);
                bbox.y2 = round((stride*row+1+cellsize)/scale);
                bbox.exist = true;
                bbox.area = (bbox.x2 - bbox.x1)*(bbox.y2 - bbox.y1);
                for(int channel=0;channel<4;channel++)
                    bbox.regreCoord[channel]=location.channel(channel)[0];//人脸框的坐标相关值
                boundingBox_.push_back(bbox);
                bboxScore_.push_back(order);
                count++;
            }
            p++;
            //plocal++;
        }
    }
	//printf("Pnet max prob: %f\n",max_p);
}
 
/**********************nms非极大值抑制****************************/
void mtcnn::nms(std::vector<Bbox> &boundingBox_, std::vector<orderScore> &bboxScore_, const float overlap_threshold, string modelname){
    if(boundingBox_.empty()){
        return;
    }
    std::vector<int> heros;
    //sort the score
    sort(bboxScore_.begin(), bboxScore_.end(), cmpScore);//cmpScore指定升序排列
 
    int order = 0;
    float IOU = 0;
    float maxX = 0;
    float maxY = 0;
    float minX = 0;
    float minY = 0;
	//规则，站上擂台的擂台主，永远都是胜利者。
    while(bboxScore_.size()>0){
        order = bboxScore_.back().oriOrder;//取得分最高勇士的编号ID。
        bboxScore_.pop_back();//勇士出列
        if(order<0)continue;//死的？下一个！（order在(*it).oriOrder = -1;改变）
        heros.push_back(order);//记录擂台主ID
        boundingBox_.at(order).exist = false;//当前这个Bbox为擂台主，签订生死簿。
 
        for(int num=0;num<boundingBox_.size();num++){
            if(boundingBox_.at(num).exist){//活着的勇士
                //the iou
                maxX = (boundingBox_.at(num).x1>boundingBox_.at(order).x1)?boundingBox_.at(num).x1:boundingBox_.at(order).x1;
                maxY = (boundingBox_.at(num).y1>boundingBox_.at(order).y1)?boundingBox_.at(num).y1:boundingBox_.at(order).y1;
                minX = (boundingBox_.at(num).x2<boundingBox_.at(order).x2)?boundingBox_.at(num).x2:boundingBox_.at(order).x2;
                minY = (boundingBox_.at(num).y2<boundingBox_.at(order).y2)?boundingBox_.at(num).y2:boundingBox_.at(order).y2;
                //maxX1 and maxY1 reuse 
                maxX = ((minX-maxX+1)>0)?(minX-maxX+1):0;
                maxY = ((minY-maxY+1)>0)?(minY-maxY+1):0;
                //IOU reuse for the area of two bbox
                IOU = maxX * maxY;
                if(!modelname.compare("Union"))
                    IOU = IOU/(boundingBox_.at(num).area + boundingBox_.at(order).area - IOU);
                else if(!modelname.compare("Min")){
                    IOU = IOU/((boundingBox_.at(num).area<boundingBox_.at(order).area)?boundingBox_.at(num).area:boundingBox_.at(order).area);
                }
                if(IOU>overlap_threshold){
                    boundingBox_.at(num).exist=false;//如果该对比框与擂台主的IOU够大，挑战者勇士战死
                    for(vector<orderScore>::iterator it=bboxScore_.begin(); it!=bboxScore_.end();it++){
                        if((*it).oriOrder == num) {
                            (*it).oriOrder = -1;//勇士战死标志
                            break;
                        }
                    }
                }//else 那些距离擂台主比较远迎战者幸免于难，将有机会作为擂台主出现
            }
        }
    }
    for(int i=0;i<heros.size();i++)
        boundingBox_.at(heros.at(i)).exist = true;//从生死簿上剔除，擂台主活下来了
}
void mtcnn::refineAndSquareBbox(vector<Bbox> &vecBbox, const int &height, const int &width){
    if(vecBbox.empty()){
        cout<<"Bbox is empty!!"<<endl;
        return;
    }
    float bbw=0, bbh=0, maxSide=0;
    float h = 0, w = 0;
    float x1=0, y1=0, x2=0, y2=0;
    for(vector<Bbox>::iterator it=vecBbox.begin(); it!=vecBbox.end();it++){
        if((*it).exist){
            bbw = (*it).x2 - (*it).x1 + 1;//滑框的宽高计算
            bbh = (*it).y2 - (*it).y1 + 1;
            x1 = (*it).x1 + (*it).regreCoord[0]*bbw;//人脸框的位置坐标计算
            y1 = (*it).y1 + (*it).regreCoord[1]*bbh;
            x2 = (*it).x2 + (*it).regreCoord[2]*bbw;
            y2 = (*it).y2 + (*it).regreCoord[3]*bbh;
 
            w = x2 - x1 + 1;//人脸框宽高
            h = y2 - y1 + 1;
          
            maxSide = (h>w)?h:w;
            x1 = x1 + w*0.5 - maxSide*0.5;
            y1 = y1 + h*0.5 - maxSide*0.5;
            (*it).x2 = round(x1 + maxSide - 1);
            (*it).y2 = round(y1 + maxSide - 1);
            (*it).x1 = round(x1);
            (*it).y1 = round(y1);
 
            //boundary check
            if((*it).x1<0)(*it).x1=0;
            if((*it).y1<0)(*it).y1=0;
            if((*it).x2>width)(*it).x2 = width - 1;
            if((*it).y2>height)(*it).y2 = height - 1;
 
            it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
        }
    }
}
void mtcnn::detect(ncnn::Mat& img_, std::vector<Bbox>& finalBbox_){
    img = img_;
    img_w = img.w;
    img_h = img.h;
    img.substract_mean_normalize(mean_vals, norm_vals);//数据预处理,归一化至(-1,1)
 
    float minl = img_w<img_h?img_w:img_h;
    int MIN_DET_SIZE = 12;
    int minsize = 40;//最小可检测图像，该值大小，控制图像金字塔的阶层数，越小，阶层越多，计算越多。
    float m = (float)MIN_DET_SIZE/minsize;
    minl *= m;
    float factor = 0.409;
    int factor_count = 0;
    vector<float> scales_;
    while(minl>MIN_DET_SIZE){
		if (factor_count > 0){ m = m*factor; }
        scales_.push_back(m);
        minl *= factor;
        factor_count++;
    }
    orderScore order;
    int count = 0;
 
    for (size_t i = 0; i < scales_.size(); i++) {
        int hs = (int)ceil(img_h*scales_[i]);
        int ws = (int)ceil(img_w*scales_[i]);
        ncnn::Mat in(ws, hs, 3);
        resize_image(img, in);//一次次生成图像金字塔中的一层图
        ncnn::Extractor ex = Pnet.create_extractor();
        ex.set_light_mode(true);
		printf("Pnet input width:%d, height:%d, channel:%d\n",in.w,in.h,in.c);
        ex.input("data", in);//Pnet只有卷积层，所以可以接受不同size的input
        ncnn::Mat score_, location_;
        ex.extract("prob1", score_);
		printf("prob1 w:%d, h:%d, ch:%d, first data:%f\n", score_.w, score_.h, score_.c, score_.data[0]);
		//for (int t_w = 0; t_w < score_.w*score_.h*score_.c; t_w++)
		//{
		//	printf("%f, ", score_.data[t_w]);
		//}
        ex.extract("conv4-2", location_);
        std::vector<Bbox> boundingBox_;
        std::vector<orderScore> bboxScore_;
        generateBbox(score_, location_, boundingBox_, bboxScore_, scales_[i]);
        nms(boundingBox_, bboxScore_, nms_threshold[0]);//分会场擂台赛
 
        for(vector<Bbox>::iterator it=boundingBox_.begin(); it!=boundingBox_.end();it++){
            if((*it).exist){//获胜擂台主得到进入主会场的机会
                firstBbox_.push_back(*it);//主会场花名册
                order.score = (*it).score;
                order.oriOrder = count;
                firstOrderScore_.push_back(order);
                count++;
            }
        }
        bboxScore_.clear();
        boundingBox_.clear();
    }
    //the first stage's nms
    if(count<1)return;
    nms(firstBbox_, firstOrderScore_, nms_threshold[0]);//主会场擂台赛
    refineAndSquareBbox(firstBbox_, img_h, img_w);
    printf("firstBbox_.size()=%d\n", firstBbox_.size());
	//for (vector<Bbox>::iterator it = firstBbox_.begin(); it != firstBbox_.end(); it++)
	//{
	//	cout << "OK" << endl;
	//	//rectangle(cp_img, Point((*it).x1, (*it).y1), Point((*it).x2, (*it).y2), Scalar(0, 0, 255), 2, 8, 0);
	//}
	//imshow("Pnet.jpg", cp_img);
	//waitKey(1000);
    //second stage
    count = 0;
    for(vector<Bbox>::iterator it=firstBbox_.begin(); it!=firstBbox_.end();it++){
        if((*it).exist){
            ncnn::Mat tempIm;
            copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2);
            ncnn::Mat in(24, 24, 3);		
            resize_image(tempIm, in);
            ncnn::Extractor ex = Rnet.create_extractor();
            ex.set_light_mode(true);
            ex.input("data", in);
            ncnn::Mat score, bbox;
            ex.extract("prob1", score);
            ex.extract("conv5-2", bbox);
            if(*(score.data+score.cstep)>threshold[1]){
                for(int channel=0;channel<4;channel++)
                    it->regreCoord[channel]=bbox.channel(channel)[0];//*(bbox.data+channel*bbox.cstep);
                it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
                it->score = score.channel(1)[0];//*(score.data+score.cstep);
                secondBbox_.push_back(*it);
                order.score = it->score;
                order.oriOrder = count++;
                secondBboxScore_.push_back(order);
            }
            else{
                (*it).exist=false;
            }
        }
    }
    printf("secondBbox_.size()=%d\n", secondBbox_.size());
    if(count<1)return;
    nms(secondBbox_, secondBboxScore_, nms_threshold[1]);
    refineAndSquareBbox(secondBbox_, img_h, img_w);
 
    //third stage 
    count = 0;
    for(vector<Bbox>::iterator it=secondBbox_.begin(); it!=secondBbox_.end();it++){
        if((*it).exist){
            ncnn::Mat tempIm;
            copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2);
            ncnn::Mat in(48, 48, 3);
            resize_image(tempIm, in);
            ncnn::Extractor ex = Onet.create_extractor();
            ex.set_light_mode(true);
            ex.input("data", in);
            ncnn::Mat score, bbox, keyPoint;
            ex.extract("prob1", score);
            ex.extract("conv6-2", bbox);
            ex.extract("conv6-3", keyPoint);
            if(score.channel(1)[0]>threshold[2]){
                for(int channel=0;channel<4;channel++)
                    it->regreCoord[channel]=bbox.channel(channel)[0];
                it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
                it->score = score.channel(1)[0];
                for(int num=0;num<5;num++){
                    (it->ppoint)[num] = it->x1 + (it->x2 - it->x1)*keyPoint.channel(num)[0];
                    (it->ppoint)[num+5] = it->y1 + (it->y2 - it->y1)*keyPoint.channel(num+5)[0];
                }
 
                thirdBbox_.push_back(*it);
                order.score = it->score;
                order.oriOrder = count++;
                thirdBboxScore_.push_back(order);
            }
            else
                (*it).exist=false;
            }
        }
 
    printf("thirdBbox_.size()=%d\n", thirdBbox_.size());
    if(count<1)return;
    refineAndSquareBbox(thirdBbox_, img_h, img_w);
    nms(thirdBbox_, thirdBboxScore_, nms_threshold[2], "Min");
    finalBbox_ = thirdBbox_;
 
    firstBbox_.clear();
    firstOrderScore_.clear();
    secondBbox_.clear();
    secondBboxScore_.clear();
    thirdBbox_.clear();
    thirdBboxScore_.clear();
}
 
int main(int argc, char** argv)
{
	/******读图（start）*******/
	const char* imagepath ;// argv[1];
	if (argc == 2)
	{
		imagepath = argv[1];
	}
	else{
		imagepath = "E:/Algrithm/ncnn/ncnn/x64/Release/test2.jpg";
	}
	cout << imagepath << endl;
    cv::Mat cv_img = cv::imread(imagepath);
    if (cv_img.data==NULL)
    {
        fprintf(stderr, "cv::imread %s failed\n", imagepath);
		system("pause");
        return -1;
    }
	printf("img w: %d  h:%d  ch:%d\n",cv_img.cols,cv_img.rows,cv_img.channels());
	imshow("img",cv_img);
	waitKey(10);
	/***************读图（end）********************/
 
	/***********MTCNN运算（start）************/
	float start = clock();
	int times = 1;
	ncnn::set_omp_num_threads(4);
	for (int cnt = 0; cnt < times; cnt++)
	{
		std::vector<Bbox> finalBbox;
		mtcnn Net;
		//OpenCV读出的图片是BGR格式的，需要转为RGB格式，否则检出率会很低。
		ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(cv_img.data, ncnn::Mat::PIXEL_BGR2RGB, cv_img.cols, cv_img.rows);
		Net.detect(ncnn_img, finalBbox);
		for (vector<Bbox>::iterator it = finalBbox.begin(); it != finalBbox.end(); it++){
			if ((*it).exist)
			{
				printf("Bbox [x1,y1], [x2,y2]:[%d,%d], [%d,%d] \n", (*it).x1, (*it).x2, (*it).y1, (*it).y2);
				rectangle(cv_img, Point((*it).x1, (*it).y1), Point((*it).x2, (*it).y2), Scalar(0, 0, 255), 2, 8, 0);
				for (int num = 0; num < 5; num++)
				{
					printf("Landmark [x1,y1]: [%d,%d] \n", (int)*(it->ppoint + num), (int)*(it->ppoint + num + 5));
					circle(cv_img, Point((int)*(it->ppoint + num), (int)*(it->ppoint + num + 5)), 3, Scalar(0, 255, 255), -1);
				}
			}
		}
	}
	/***********MTCNN运算（end）************/
	printf("MTCNN mean time comsuming: %f ms\n",(clock()-start)/times);
    imshow("result.jpg",cv_img);
	waitKey(100);
	system("pause");
    return 0;
}
#endif

NineDays66

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
MTCNN caffe 与 ncnn 实现代码

Caffe versionmtcnn.h#ifndef _MTCNN_H_#define _MTCNN_H_#include <caffe/caffe.hpp>#include <opencv2/opencv.hpp>#include <vector>#ifdef _OPENMP#include <omp.h>#endif...
复制链接

扫一扫