环境准备
- Python3
- OpenCV 4.5.0 (4版本以上都可以)
- Mask-RCNN预训练模型
利用opencv自带的工具生成对应的模型对应的描述文件
在opencv源码路径下opencv450/opencv-4.5.0/samples/dnn
找到对应的工具
利用工具生成预训练模型对应的描述文件
python ./tf_text_graph_mask_rcnn.py --input /home/mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --output /home/mask_rcnn_inception_v2_coco_2018_01_28/graph.pbtxt --config /home/mask_rcnn_inception_v2_coco_2018_01_28/pipeline.config
现在有了描述文件,和模型文件后就可以使用opencv的dnn模块来调用啦
DNN模块使用
主要涉及的函数很简单,就下面几个:
主要涉及函数有以下几个:
- readNetFromTensorflow
- blobFromImage
- setInput
- forward
Mat cv::dnn::blobFromImage(
InputArray image,
double scalefactor = 1.0,
const Size & size = Size(),
const Scalar & mean = Scalar(),
bool swapRB = false,
bool crop = false,
int ddepth = CV_32F )
//image:输入图像
//scalefactor: multiplier for image values.
//size:指的不是输入图像的尺寸,是指所需要的尺寸,也就是返回的Mat中数据的尺寸。
//swapRB:是否交换R和B分量,这在之前的色彩空间转换的文章中介绍过
//crop:输入图像大小与size不符的时候,是否需要裁剪
//ddepth:图像的数据类型,目前仅支持32F和8U
demo
#include <opencv2/opencv.hpp>
char classname[90][32] = { "person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light",
"fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra",
"giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat",
"baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl",
"banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant",
"bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster",
"sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush" };
int main()
{
cv::namedWindow("src", cv::WINDOW_NORMAL);
cv::String pb_model = "/home/mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb";
cv::String pb_txt = "/home/mask_rcnn_inception_v2_coco_2018_01_28/graph.pbtxt";
cv::Mat frame = cv::imread("./tmp/test1.jpg");
// 加载模型
cv::dnn::Net net = cv::dnn::readNetFromTensorflow(pb_model, pb_txt);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
//net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
//net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
// 设置数据
cv::Mat blobImage = cv::dnn::blobFromImage(frame, 1.0, cv::Size(1365, 800), cv::Scalar(0, 0, 0), true, false);
printf("blobImage width : %d, height: %d\n", blobImage.size[3], blobImage.size[2]);
net.setInput(blobImage);
// 推理
std::vector<cv::String> out_names;
out_names.push_back("detection_out_final");
out_names.push_back("detection_masks");
std::vector<cv::Mat> outs;
net.forward(outs, out_names);
cv::Mat detection = outs[0];
int id = outs[1].size[0];
int numClasses = outs[1].size[1];
int mh = outs[1].size[2];
int mw = outs[1].size[3];
cv::Mat masks = outs[1]; // Nx90x15x15
printf("id: %d, numClasses:%d, m:%d, s:%d \n", id, numClasses, mh, mw);
// 输出
cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidence_threshold = 0.6;
for (int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold)
{
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;
cv::Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
cv::rectangle(frame, object_box, cv::Scalar(255, 0, 255), 1, 8, 0);
cv::putText(frame, cv::format(" %s %.2f", classname[objIndex], confidence), cv::Point(tl_x - 10, tl_y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(0, 255, 0), 2, 8);
// 解析mask
cv::Mat mask(masks.size[2], masks.size[3], CV_32F, masks.ptr<float>(i, objIndex));
cv::Mat color_mask = cv::Mat::zeros(mask.size(), CV_8UC3);
cv::Mat bin_mask = cv::Mat::zeros(mask.size(), CV_8UC1);
for (int row = 0; row < color_mask.rows; row++)
{
for (int col = 0; col < color_mask.cols; col++)
{
float m = mask.at<float>(row, col);
if (m >= 0.5)
{
color_mask.at<cv::Vec3b>(row, col) = cv::Vec3b(0, 0, 255);
bin_mask.at<uchar>(row, col) = 255;
}
}
}
cv::Mat roi = frame(object_box);
cv::resize(color_mask, color_mask, roi.size());
cv::resize(bin_mask, bin_mask, roi.size());
cv::Mat result;
cv::bitwise_and(roi, roi, result, bin_mask);
cv::addWeighted(roi, 0.5, color_mask, 0.5, 0, roi);
}
}
cv::imshow("src", frame);
cv::waitKey();
}