yolov3-tiny+realsense d455获取目标深度信息及位置信息

最新推荐文章于 2024-06-08 19:30:36 发布

David_By

最新推荐文章于 2024-06-08 19:30:36 发布

阅读量4.5k

点赞数 1

分类专栏： Linux 文章标签： c++ opencv

本文链接：https://blog.csdn.net/JanKin_BY/article/details/120622504

版权

Linux 专栏收录该内容

52 篇文章 5 订阅

订阅专栏

参考：链接：https://www.freesion.com/article/3585644384/，根据内参获取真实位置
/// https://blog.csdn.net/SFM2020/article/details/84591758

yolov3-tiny下载：GitHub - smarthomefans/darknet-test: darknet 测试

文件结构

.h文件

#include <iostream>
#include <string>
#include <vector>
#include <chrono>
#include <mutex> //定义了C++11标准中的一些互斥访问的类与方法
#include <thread> //线程头文件
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/types_c.h>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/dnn.hpp>
#include <algorithm>
#include<librealsense2/rs.hpp>
#include<librealsense2/rsutil.h>
using namespace std;
using namespace cv;
using namespace dnn;
#define limit_L(a,b) (a-b>0)?(a-b):(0)
#define limit_H(a,b,max) (a+b>max)?(max):(a+b)

#define STREAM          RS2_STREAM_DEPTH  // rs2_stream is a types of data provided by RealSense device           //
#define FORMAT          RS2_FORMAT_Z16    // rs2_format identifies how binary data is encoded within a frame      //
#define WIDTH           640               // Defines the number of columns for each frame or zero for auto resolve//
#define HEIGHT          480                 // Defines the number of lines for each frame or zero for auto resolve  //
#define FPS             30                // Defines the rate of frames per second                                //
#define STREAM_INDEX    0                 // Defines the stream index, used for multiple streams of the same type //
#define HEIGHT_RATIO    20                // Defines the height ratio between the original frame to the new frame //
#define WIDTH_RATIO     10                // Defines the width ratio between the original frame to the new frame  //


#define STREAM1          RS2_STREAM_COLOR  // rs2_stream is a types of data provided by RealSense device           //
#define FORMAT1          RS2_FORMAT_RGB8   // rs2_format identifies how binary data is encoded within a frame      //
#define FPS1             30                // Defines the rate of frames per second                                //
#define STREAM_INDEX     0                 // Defines the stream index, used for multiple streams of the same type //

// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(Mat& frame, const vector<Mat>& out);
// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
// Get the names of the output layers
vector<String> getOutputsNames(const Net& net);
void detect_camera(string modelWeights, string modelConfiguration, string classesFile);
void postprocess(Mat& frame, const vector<Mat>& outs);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
vector<String> getOutputsNames(const Net& net);
float get_depth_unit_value(const rs2_device* const dev);
uint16_t get_color_depth(Mat color_c,rs2_intrinsics color_intri,Mat depth_c,int color_cols,int color_rows,int depth_err);
struct color_point
{
    int x;
    int y;
    double color;
}color_Point;
Mat image_yolov3,yolov3_depth;
rs2_intrinsics color_intri_all;
std::mutex mtx_yolo;

.cpp文件

#include "./include/realsense_yolo.h"
vector<string> classes;
int inpWidth = WIDTH;  // Width of network's input image
int inpHeight= HEIGHT; // Height of network's input image
string dir="/home/user-e290/Desktop/depth_yolov3/files/";
float confThreshold = 0.25; // Confidence threshold
float nmsThreshold = 0.45;  // Non-maximum suppression threshold
string modelConfiguration = dir+"yolov3-tiny.cfg";
string modelWeights = dir+"yolov3-tiny.weights";
//string modelConfiguration = dir+"yolov3.cfg";
//string modelWeights = dir+"yolov3.weights";
string classesFile = dir+"coco.names";// "coco.names";
void postprocess(Mat& frame, const vector<Mat>& outs)
{
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	for (size_t pos_i = 0; pos_i < outs.size(); ++pos_i)
	{
		// Scan through all the bounding boxes output from the network and keep only the
		// ones with high confidence scores. Assign the box's class label as the class
		// with the highest score for the box.
		float* data = (float*)outs[pos_i].data;
		for (int pos_j = 0; pos_j < outs[pos_i].rows; ++pos_j, data += outs[pos_i].cols)
		{
			Mat scores = outs[pos_i].row(pos_j).colRange(5, outs[pos_i].cols);
			Point classIdPoint;
			double confidence;

			// Get the value and location of the maximum score
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > confThreshold)
			{
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}
	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		drawPred(classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame);
        get_color_depth(frame,color_intri_all,yolov3_depth,box.x + box.width/2,box.y + box.height/2,20);
	}
}
// Get the names of the output layers
vector<String> getOutputsNames(const Net& net)
{
	static vector<String> names;
	if (names.empty())
	{
		//Get the indices of the output layers, i.e. the layers with unconnected outputs
		vector<int> outLayers = net.getUnconnectedOutLayers();
		//get the names of all the layers in the network
		vector<String> layersNames = net.getLayerNames();
		// Get the names of the output layers in names
		names.resize(outLayers.size());
		for (size_t get_i = 0; get_i < outLayers.size(); ++get_i)
			names[get_i] = layersNames[outLayers[get_i] - 1];
	}
	return names;
}
// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
	//Draw a rectangle displaying the bounding box
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);
	//Get the label for the class name and its confidence
	string label = format("%.2f", conf);
	if (!classes.empty())
	{
		CV_Assert(classId < (int)classes.size());
		label = classes[classId] + ":" + label;
	}	
	//Display the label at the top of the bounding box
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	rectangle(frame, Point(left, top - round(1.5*labelSize.height)), Point(left + round(1.5*labelSize.width), top + baseLine), Scalar(255, 255, 255), FILLED);
	cv::putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
}
 //摄像头
 void detect_camera(string modelWeights, string modelConfiguration, string classesFile) 
 {
	Net net = readNetFromDarknet(modelConfiguration, modelWeights);
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);	
	Mat frame, blob;
	// Process frames.
    std::this_thread::sleep_for(std::chrono::seconds(3));
	while (1)
	{
        frame=image_yolov3;
       // std::this_thread::sleep_for(std::chrono:: milliseconds (10));	//添加
		if (frame.empty()) {
			cout << "Done processing !!!" << endl;
            std::this_thread::sleep_for(std::chrono::seconds(3));
		}
        else{
            // Create a 4D blob from a frame.
            blobFromImage(frame, blob, 1 / 255.0, cv::Size(inpWidth, inpHeight), Scalar(0, 0, 0), true, false);		
            //Sets the input to the network
            net.setInput(blob);
            // Runs the forward pass to get output of the output layers
            vector<Mat> outs;
            net.forward(outs, getOutputsNames(net));
            // Remove the bounding boxes with low confidence
            postprocess(frame, outs);
            // Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
            vector<double> layersTimes;
            double freq = getTickFrequency() / 1000;
            double t = net.getPerfProfile(layersTimes) / freq;
            string label = format("Inference time for a frame : %.2f ms", t);
            putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));	
            // Write the frame with the detection boxes
            Mat detectedFrame;
            frame.convertTo(detectedFrame, CV_8U);
            mtx_yolo.lock();
            line(frame,cv::Point(frame.cols*0.05,frame.rows*0.5),cv::Point(frame.cols*0.95,frame.rows*0.5),cv::Scalar(0, 1, 0),2);  // X轴
            line(frame,cv::Point(frame.cols*0.5,frame.rows*0.05),cv::Point(frame.cols*0.5,frame.rows*0.95),cv::Scalar(0, 1, 0),2);  // Y轴
            putText(frame,"x",Point(frame.cols*0.95,frame.rows*0.48),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            putText(frame,"y",Point(frame.cols*0.52,frame.rows*0.05),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            putText(frame,"I",Point(frame.cols*0.75,frame.rows*0.25),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            putText(frame,"II",Point(frame.cols*0.25,frame.rows*0.25),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            putText(frame,"III",Point(frame.cols*0.25,frame.rows*0.75),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            putText(frame,"IV",Point(frame.cols*0.75,frame.rows*0.75),FONT_HERSHEY_PLAIN,2,cv::Scalar(0, 1, 0),2);
            imshow("deep learning",frame);
            waitKey(10);
            mtx_yolo.unlock();
	    }
    }	
}


//
///  date:     2021/09/28
///  input:    Mat depth_c 深度图，Mat color_c rgb图，rgb图color_cols列，rgb图color_rows行，depth_err像素偏移误差
///  function: 根据输入的rgb图及rgb 行和列的位置取出深度图对应的值
///  output：   深度信息   
///  链接：https://www.freesion.com/article/3585644384/，根据内参获取真实位置
///       https://blog.csdn.net/SFM2020/article/details/84591758
//
uint16_t get_color_depth(Mat color_c,rs2_intrinsics color_intri,Mat depth_c,int color_cols,int color_rows,int depth_err)
{
  char str[10];//用于存放帧率的字符串
  uint16_t depth_sim=limit_depth*1000;
  int limit_left=0,limit_right=0,limit_up=0,limit_down=0;
  limit_left=limit_L(color_cols,depth_err);
  limit_right=limit_H(color_cols,depth_err,depth_c.cols);
  limit_up=limit_H(color_rows,depth_err,depth_c.rows);
  limit_down=limit_L(color_rows,depth_err);//(row,col)
  for(int get_i=limit_down;get_i<limit_up;get_i++){
    for(int get_j=limit_left;get_j<limit_right;get_j++){
       if(depth_c.at<uint16_t>(get_i,get_j)!=0)
       {
          if(depth_c.at<uint16_t>(get_i,get_j)<depth_sim)
           depth_sim=depth_c.at<uint16_t>(get_i,get_j);  
       }              
    }
  } 
  Point target_xy_true;
  target_xy_true.x=(color_cols-color_intri.ppx)*depth_sim/color_intri.fx;//cols为x
  target_xy_true.y=(color_intri.ppy-color_rows)*depth_sim/color_intri.fy;//rows为y
  string text = "Dp="; 
  sprintf(str, "%d", depth_sim); 
  text += str; 
  text +=" Px=";
  sprintf(str, "%d", target_xy_true.x); 
  text += str; 
  text +=" Py=";
  sprintf(str, "%d", target_xy_true.y); 
  text += str; 
  circle(color_c,cv::Point(color_cols,color_rows),16, Scalar(0.5,0.2,0),1,8,0);//点是绿色 
  putText(color_c, text,cv::Point(color_cols,color_rows),cv::FONT_HERSHEY_COMPLEX, 0.3, cv::Scalar(0, 1, 0), 1, 8, 0); 
  return depth_sim;
}

//获取深度像素对应长度单位（米）的换算比例
float get_depth_scale(rs2::device dev)
{
    // Go over the device's sensors
    for (rs2::sensor& sensor : dev.query_sensors())
    {
        // Check if the sensor if a depth sensor
        if (rs2::depth_sensor dpt = sensor.as<rs2::depth_sensor>())
        {
            return dpt.get_depth_scale();
        }
    }
    throw std::runtime_error("Device does not have a depth sensor");
}

//深度图对齐到彩色图函数
Mat align_Depth2Color(Mat depth,Mat color,rs2::pipeline_profile profile){
    //声明数据流
    auto depth_stream=profile.get_stream(RS2_STREAM_DEPTH).as<rs2::video_stream_profile>();
    auto color_stream=profile.get_stream(RS2_STREAM_COLOR).as<rs2::video_stream_profile>();
    //获取内参
    const auto intrinDepth=depth_stream.get_intrinsics();
    const auto intrinColor=color_stream.get_intrinsics();
    color_intri_all=intrinColor;
    //直接获取从深度摄像头坐标系到彩色摄像头坐标系的欧式变换矩阵
    //auto  extrinDepth2Color=depth_stream.get_extrinsics_to(color_stream);
    rs2_extrinsics  extrinDepth2Color;
    rs2_error *error;
    rs2_get_extrinsics(depth_stream,color_stream,&extrinDepth2Color,&error);
 
    //平面点定义
    float pd_uv[2],pc_uv[2];
    //空间点定义
    float Pdc3[3],Pcc3[3];
 
    //获取深度像素与现实单位比例（D435默认1毫米）
    float depth_scale = get_depth_scale(profile.get_device());
    int y=0,x=0;
    //初始化结果
    Mat result=Mat::zeros(color.rows,color.cols,CV_8UC3);
    //对深度图像遍历
    for(int row=0;row<depth.rows;row++){
        for(int col=0;col<depth.cols;col++){
            //将当前的(x,y)放入数组pd_uv，表示当前深度图的点
            pd_uv[0]=col;//深度图像的列，左上角为0 col为x
            pd_uv[1]=row;//深度图像的行，左上角为0 ros为y
            //取当前点对应的深度值
            uint16_t depth_value=depth.at<uint16_t>(row,col);//对应深度图像的深度信息
            //换算到米
            float depth_m=depth_value*depth_scale;
            //将深度图的像素点根据内参转换到深度摄像头坐标系下的三维点
            rs2_deproject_pixel_to_point(Pdc3,&intrinDepth,pd_uv,depth_m);
            //将深度摄像头坐标系的三维点转化到彩色摄像头坐标系下
            rs2_transform_point_to_point(Pcc3,&extrinDepth2Color,Pdc3);
            //将彩色摄像头坐标系下的深度三维点映射到二维平面上
            rs2_project_point_to_pixel(pc_uv,&intrinColor,Pcc3);
            //取得映射后的（u,v)
            x=(int)pc_uv[0];
            y=(int)pc_uv[1];
            x=x<0? 0:x;
            x=x>depth.cols-1 ? depth.cols-1:x;
            y=y<0? 0:y;
            y=y>depth.rows-1 ? depth.rows-1:y;
 
            //将成功映射的点用彩色图对应点的RGB数据覆盖
           /* for(int k=0;k<3;k++){
                //这里设置了只显示3米距离内的东西
                if(depth_m<3)
                result.at<cv::Vec3b>(y,x)[k]=
                        color.at<cv::Vec3b>(y,x)[k];                
            }*/
            if(depth_m<limit_depth)
             result.at<uint16_t>(y,x)=depth_value;//得到对齐后的深度图https://www.cnblogs.com/AsanoLy/p/15072691.html
        }
    }
    return result;
}
 
int main()
{
    std::thread t1(&detect_camera,modelWeights, modelConfiguration, classesFile);
    t1.detach(); 
    //深度图像颜色map
    rs2::colorizer c;                          // Helper to colorize depth images
    //创建数据管道
    rs2::pipeline pipe;
    rs2::config pipe_config;
    pipe_config.enable_stream(RS2_STREAM_DEPTH,640,480,RS2_FORMAT_Z16,30);
    pipe_config.enable_stream(RS2_STREAM_COLOR,640,480,RS2_FORMAT_BGR8,30);
 
    //start()函数返回数据管道的profile
    rs2::pipeline_profile profile = pipe.start(pipe_config);
 
    //定义一个变量去转换深度到距离
    float depth_clipping_distance = 1.f;
 
    //声明数据流
    auto depth_stream=profile.get_stream(RS2_STREAM_DEPTH).as<rs2::video_stream_profile>();
    auto color_stream=profile.get_stream(RS2_STREAM_COLOR).as<rs2::video_stream_profile>();
 
    //获取内参
    auto intrinDepth=depth_stream.get_intrinsics();
    auto intrinColor=color_stream.get_intrinsics();
    color_intri_all=intrinColor;//将内参传递出去
    //直接获取从深度摄像头坐标系到彩色摄像头坐标系的欧式变换矩阵
    auto  extrinDepth2Color=depth_stream.get_extrinsics_to(color_stream);
 
    while (1) // Application still alive?
    {
        const char* depth_win="depth_Image";
        namedWindow(depth_win,WINDOW_AUTOSIZE);
        //深度图像颜色map
        rs2::colorizer c;                          // Helper to colorize depth images
        //堵塞程序直到新的一帧捕获
        rs2::frameset frameset = pipe.wait_for_frames();
        //取深度图和彩色图
        rs2::frame color_frame = frameset.get_color_frame();//processed.first(align_to);
        rs2::frame depth_frame = frameset.get_depth_frame();
        rs2::frame depth_frame_4_show = frameset.get_depth_frame().apply_filter(c);
        //获取宽高
        const int depth_w=depth_frame.as<rs2::video_frame>().get_width();
        const int depth_h=depth_frame.as<rs2::video_frame>().get_height();
        const int color_w=color_frame.as<rs2::video_frame>().get_width();
        const int color_h=color_frame.as<rs2::video_frame>().get_height();
 
        //创建OPENCV类型 并传入数据
        Mat depth_image(Size(depth_w,depth_h),
                                CV_16U,(void*)depth_frame.get_data(),Mat::AUTO_STEP);
        Mat depth_image_4_show(Size(depth_w,depth_h),
                                CV_8UC3,(void*)depth_frame_4_show.get_data(),Mat::AUTO_STEP);
        Mat color_image(Size(color_w,color_h),
                                CV_8UC3,(void*)color_frame.get_data(),Mat::AUTO_STEP);
        //实现深度图与彩色图对其,返回对其后的深度图
        Mat result=align_Depth2Color(depth_image,color_image,profile);
        mtx_yolo.lock();      
        imshow(depth_win,depth_image_4_show);
        waitKey(10);
        mtx_yolo.unlock();
        image_yolov3=color_image;
        yolov3_depth=result;
    }
    return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.1.0)
project(RealsenseExamples-Depth)
set(DEPENDENCIES realsense2 ${PCL_LIBRARIES})
list(APPEND DEPENDENCIES ${OPENGL_LIBRARIES})
find_package(OpenCV REQUIRED)
find_package(OpenGL)
if(NOT WIN32)
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread")
endif()
if(NOT OPENGL_FOUND)
    message(FATAL_ERROR "\n\n OpenGL package is missing!\n\n")
endif()
if(WIN32)
    list(APPEND DEPENDENCIES glfw3)
else()
    find_package(glfw3 REQUIRED)
    list(APPEND DEPENDENCIES glfw)
endif()

add_executable(realsense_yolo realsense_yolo.cpp)

target_link_libraries(realsense_yolo ${DEPENDENCIES} ${OpenCV_LIBS})

David_By

关注

1
点赞
踩
33

收藏

觉得还不错? 一键收藏
1
评论
yolov3-tiny+realsense d455获取目标深度信息及位置信息

参考：链接：https://www.freesion.com/article/3585644384/，根据内参获取真实位置/// https://blog.csdn.net/SFM2020/article/details/84591758文件结构.h文件#include <iostream>#include <string>#include <vector>#include <chrono>#include <...
复制链接

扫一扫

专栏目录