Intel Realsense D435 通过识别目标的像素坐标和深度值（使用内参intrinsics）获取目标点的真实坐标

最新推荐文章于 2024-09-11 21:45:00 发布

Dontla

最新推荐文章于 2024-09-11 21:45:00 发布

阅读量1.7w

点赞数 21

分类专栏： Intel RealSense

本文链接：https://blog.csdn.net/Dontla/article/details/102659079

版权

Intel RealSense 专栏收录该内容

163 篇文章 140 订阅

订阅专栏

Intel Realsense D435 通过识别目标的像素坐标和深度值（使用内参intrinsics）获取目标点的真实坐标

图原理

在这里插入图片描述

对付fy同理
(0,0)位置为D435 RGB摄像头对应点位置

基本获取内参`intrinsics`代码

import pyrealsense2 as rs

pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
profile = pipeline.start(config)
frames = pipeline.wait_for_frames()
depth = frames.get_depth_frame()
color = frames.get_color_frame()

# 获取内参
depth_profile = depth.get_profile()
# print(depth_profile)
# <pyrealsense2.video_stream_profile: 1(0) 640x480 @ 30fps 1>
# print(type(depth_profile))
# <class 'pyrealsense2.pyrealsense2.stream_profile'>
# print(depth_profile.fps())
# 30
# print(depth_profile.stream_index())
# 0
# print(depth_profile.stream_name())
# Depth
# print(depth_profile.stream_type())
# stream.depth
# print('', depth_profile.unique_id)
# <bound method PyCapsule.unique_id of <pyrealsense2.video_stream_profile: 1(0) 640x480 @ 30fps 1>>

color_profile = color.get_profile()
# print(color_profile)
# <pyrealsense2.video_stream_profile: 2(0) 640x480 @ 30fps 6>
# print(type(color_profile))
# <class 'pyrealsense2.pyrealsense2.stream_profile'>
# print(depth_profile.fps())
# 30
# print(depth_profile.stream_index())
# 0

cvsprofile = rs.video_stream_profile(color_profile)
dvsprofile = rs.video_stream_profile(depth_profile)

color_intrin = cvsprofile.get_intrinsics()
# print(color_intrin.fx)
# 616.5906372070312
# print(color_intrin)
# width: 640, height: 480, ppx: 318.482, ppy: 241.167, fx: 616.591, fy: 616.765, model: 2, coeffs: [0, 0, 0, 0, 0]

# depth_intrin = dvsprofile.get_intrinsics()
# print(depth_intrin)
# width: 640, height: 480, ppx: 317.78, ppy: 236.709, fx: 382.544, fy: 382.544, model: 4, coeffs: [0, 0, 0, 0, 0]

# extrin = depth_profile.get_extrinsics_to(color_profile)
# print(extrin)
# rotation: [0.999984, -0.00420567, -0.00380472, 0.00420863, 0.999991, 0.00076919, 0.00380145, -0.00078519, 0.999992]
# translation: [0.0147755, 0.000203265, 0.00051274]

实操代码1（在`tensorflow-yolov3`中获取内参）

# 先获取对齐流，得到color_intrin后，将对齐后的流通过比例关系将目标的像素坐标转换为实际坐标。
# 我们这使用的识别框架为Tensorflow-yolov3

import pyrealsense2 as rs

pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)

pipeline.start(config)

# 创建对齐对象（深度对齐颜色）
align = rs.align(rs.stream.color)

try:
    while True:
    	frames = pipeline.wait_for_frames()
    	
    	# 获取对齐帧集
        aligned_frames = align.process(frames)
        
        # 获取对齐后的深度帧和彩色帧
        aligned_depth_frame = aligned_frames.get_depth_frame()
        color_frame = aligned_frames.get_color_frame()

		# 获取颜色帧内参
        color_profile = color_frame.get_profile()
        cvsprofile = rs.video_stream_profile(color_profile)
        color_intrin = cvsprofile.get_intrinsics()
        color_intrin_part = [color_intrin.ppx, color_intrin.ppy, color_intrin.fx, color_intrin.fy]
        # print(color_intrin_part)
        # [318.48199462890625, 241.16720581054688, 616.5906372070312, 616.7650146484375]
	
        if not aligned_depth_frame or not color_frame:
	    	continue

实操代码2（在`tensorflow-yolov3` `draw_bbox()`函数中实现坐标转换操作）

# video_demo.py文件中：
image = utils.draw_bbox(frame, bboxes, aligned_depth_frame, color_intrin_part)

# utils.py文件中：
def draw_bbox(image, bboxes, aligned_depth_frame, color_intrin_part, classes=read_class_names(cfg.YOLO.CLASSES),
              show_label=True):

	print('*' * 50)
	
	# 提取ppx,ppy,fx,fy
    ppx = color_intrin_part[0]
    ppy = color_intrin_part[1]
    fx = color_intrin_part[2]
    fy = color_intrin_part[3]

	for i, bbox in enumerate(bboxes):
		if show_label:
			target_xy_pixel = [int(round((coor[0] + coor[2]) / 2)), int(round((coor[1] + coor[3]) / 2))]
            target_depth = aligned_depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1])

            target_xy_true = [(target_xy_pixel[0] - ppx) * target_depth / fx,
                              (target_xy_pixel[1] - ppy) * target_depth / fy]
            print('识别出目标：{} 中心点像素坐标：({}, {}) 实际坐标(mm)：（{:.3f}，{:.3f}） 深度(mm)：{:.3f}'.format(classes[class_ind],
                                                                                            target_xy_pixel[0],
                                                                                            target_xy_pixel[1],
                                                                                            target_xy_true[0] * 1000,
                                                                                            -target_xy_true[1] * 1000,
                                                                                            target_depth * 1000))
            **************************************************
			识别出目标：person 中心点像素坐标：(272, 142) 实际坐标(mm)：（-160，341） 深度(mm)：2122
			识别出目标：person 中心点像素坐标：(414, 197) 实际坐标(mm)：（506，234） 深度(mm)：3268
			识别出目标：person 中心点像素坐标：(114, 246) 实际坐标(mm)：（-930，-22） 深度(mm)：2804
			识别出目标：chair 中心点像素坐标：(82, 340) 实际坐标(mm)：（-934，-390） 深度(mm)：2435
			识别出目标：chair 中心点像素坐标：(60, 296) 实际坐标(mm)：（-1021，-216） 深度(mm)：2435
			识别出目标：keyboard 中心点像素坐标：(456, 408) 实际坐标(mm)：（199，-241） 深度(mm)：892
			识别出目标：laptop 中心点像素坐标：(287, 303) 实际坐标(mm)：（-67，-131） 深度(mm)：1306
			识别出目标：laptop 中心点像素坐标：(354, 221) 实际坐标(mm)：（77，44） 深度(mm)：1332
			识别出目标：laptop 中心点像素坐标：(428, 257) 实际坐标(mm)：（507，-73） 深度(mm)：2856
			识别出目标：laptop 中心点像素坐标：(522, 357) 实际坐标(mm)：（0，-0） 深度(mm)：0

return image