机器人感知算法-基于yolov5和ROS2的物体识别定位（Python实现）

Renascence_6

于 2024-06-19 11:06:09 发布

阅读量302

点赞数 3

文章标签：机器人算法 YOLO python 人工智能计算机视觉深度学习

本文链接：https://blog.csdn.net/Renascence_6/article/details/139796906

版权

简介

视觉感知是机器人研发的重要一环。机械臂对物体进行抓取、放置、操作三类任务中，需要对感兴趣的物体进行识别定位，从而为机械臂的控制规划提供视觉指导信息。这里提供一份示例代码，以供研究者和工程师交流。

算法思路

1、相机取流，一般选用RGBD相机获取color和depth
2、yolov5进行目标识别
3、识别的bounding box映射到depth，并将相关区域的深度图转为点云
4、对目标点云进行滤波处理，并估计位置信息
5、通过ROS2通信模块将目标的xyz信息发送出去

依赖

PyTorch、Open3D、ROS2、Realsense相机驱动

代码

import os
import torch
import cv2
import numpy as np
import open3d as o3d
import pyrealsense2 as rs
import math

import rclpy
from rclpy.node import Node
from geometry_msgs.msg import Transform
from std_msgs.msg import Header
from vision_msgs.msg import DetectedObject, DetectedObjectList
from datetime import datetime

camera_to_base = np.array([[0.4830191777293635, -0.5454382934019704, 0.6849741177857716, 0.09830437700902608],
                           [-0.3508700489363267, -0.8372981068953109, -0.4193114462412742, 0.5652720778206468],
                           [0.8022360517360645, -0.03780143225164029, -0.5958090038046218, 0.794058946750924],
                           [0.0, 0.0, 0.0, 1.0]])

objectname = {
    0:"push pedal",
    1:"cup",
    2:"doorknob"
}


def object_2D_detection(img):
    # Model
    model = torch.hub.load(r'/home/lz/yolov5', 'custom', path=r'runs/train/best_20230620sep.pt', source='local', force_reload=True)

    # Inference
    results = model(img, size=1280)

    aa = results.pandas()
    print(aa)

    boxes = results.xyxy[0].tolist()

    torch.cuda.empty_cache()

    return boxes

def depth_to_pointcloud(depth_image, intrinsic):
    # Create Open3D Image from depth map
    o3d_depth = o3d.geometry.Image(depth_image)

    # Get intrinsic parameters
    fx, fy, cx, cy = intrinsic.fx, intrinsic.fy, intrinsic.ppx, intrinsic.ppy

    # Create Open3D PinholeCameraIntrinsic object
    o3d_intrinsic = o3d.camera.PinholeCameraIntrinsic(width=depth_image.shape[1], height=depth_image.shape[0], fx=fx, fy=fy, cx=cx, cy=cy)

    # Create Open3D PointCloud object from depth image and intrinsic parameters
    pcd = o3d.geometry.PointCloud.create_from_depth_image(o3d_depth, o3d_intrinsic)

    return pcd

def achieve_targetpointcloud(mask, depth, intrinsic):
    mask_resized = cv2.resize(mask, (depth.shape[1], depth.shape[0]))
    roi = cv2.bitwise_and(depth, depth, mask=mask_resized)
    target_pointcloud = depth_to_pointcloud(roi, intrinsic)
    return target_pointcloud

def generate_mask(img_shape, x1, y1, x2, y2):
    mask = np.zeros(img_shape[:2], dtype=np.uint8)
    mask[y1:y2, x1:x2] = 1
    return mask

def transform_matrix(x, y, z):
    # 生成平移矩阵
    translate = np.eye(4)
    translate[:-1, -1] = [x, y, z]

    # 生成旋转矩阵（单位矩阵）
    rotation = np.eye(4)

    # 将平移矩阵和旋转矩阵相乘得到变换矩阵
    transform = np.matmul(translate, rotation)

    return transform

def shrink_rect(x1, y1, x2, y2):
    # 计算原始矩形框的宽度和高度
    width = x2 - x1
    height = y2 - y1

    # 计算缩小 10% 后的宽度和高度
    new_width = math.floor(width * 0.9)
    new_height = math.floor(height * 0.9)

    # 计算缩小后矩形框的左上角坐标和右下角坐标
    new_x1 = math.ceil(x1 + (width - new_width) / 2)
    new_y1 = math.ceil(y1 + (height - new_height) / 2)
    new_x2 = math.floor(x2 - (width - new_width) / 2)
    new_y2 = math.floor(y2 - (height - new_height) / 2)

    return new_x1, new_y1, new_x2, new_y2


num_image = 0

# 初始化 RealSense 相机
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.depth, 1280, 720, rs.format.z16, 15)
config.enable_stream(rs.stream.color, 1280, 720, rs.format.rgb8, 15)
profile = pipeline.start(config)

align_to = rs.stream.color
align = rs.align(align_to)

rclpy.init()
node = Node("vision_node")
publisher = node.create_publisher(DetectedObjectList, "object_detection", 10)

try:
    while True:
        frames = pipeline.wait_for_frames()
        aligned_frames = align.process(frames)
        depth_frame = aligned_frames.get_depth_frame()
        if not depth_frame:
            continue
        color_frame = aligned_frames.get_color_frame()
        if not color_frame:
            continue
        if num_image < 100:
            num_image += 1
            continue

        depth_image = np.asanyarray(depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())

        depth_intrinsics = depth_frame.profile.as_video_stream_profile().intrinsics

        boxes_2D = object_2D_detection(color_image)
        num_image += 1
        boxes_2D.sort(key=lambda x: x[1])
        print("box:", boxes_2D)

        num_push_pedal = 0
        num_cup = 0
        num_object = {
            0:0,
            1:0,
            2:0
        }

        detected_object_list = DetectedObjectList()

        for box in boxes_2D:
            if box[5] == 0 and num_object[0] >= 1:
                continue
            lefttop_x, lefttop_y, rightdown_x, rightdown_y = shrink_rect(box[0], box[1], box[2], box[3])
            mask = generate_mask(color_image.shape, lefttop_x, lefttop_y, rightdown_x, rightdown_y)

            target_raw = achieve_targetpointcloud(mask, depth_image, depth_intrinsics)

            target, ind = target_raw.remove_statistical_outlier(nb_neighbors=10, std_ratio=2.0)

            centroid = np.asarray(target.points).mean(axis=0)
            print("The centroid of the ", num_object[box[5]], "st", objectname[box[5]] ,"is:", centroid[0], centroid[1], centroid[2])

            xyz_msg_x = centroid[0]
            xyz_msg_y = centroid[1]
            xyz_msg_z = centroid[2]

            num_object[box[5]] += 1

            object_in_camera = transform_matrix(xyz_msg_x, xyz_msg_y, xyz_msg_z)
            object_in_base = np.dot(camera_to_base, object_in_camera)

            print(objectname[box[5]], ':')
            print(object_in_base)

            detected_object = DetectedObject()
            detected_object.transformation_matrix = object_in_base.ravel().astype(np.float32)
            detected_object.timestamp = str(datetime.now())
            detected_object.category_id = int(box[5])
            detected_object_list.detected_objects.append(detected_object)

        publisher.publish(detected_object_list)
finally:
    node.destroy_node()
    rclpy.shutdown()

    pipeline.stop()

# ROS2 自定义msg
# DetectedObject.msg
float32[16] transformation_matrix
string timestamp
int32 category_id

# DetectedObjectList.msg
DetectedObject[] detected_objects

Renascence_6

关注

3
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
机器人感知算法-基于yolov5和ROS2的物体识别定位（Python实现）

视觉感知是机器人研发的重要一环。机械臂对物体进行抓取、放置、操作三类任务中，需要对感兴趣的物体进行识别定位，从而为机械臂的控制规划提供视觉指导信息。这里提供一份示例代码，以供研究者和工程师交流。
复制链接

扫一扫