输入图像名称可视化NuScenes数据集

最新推荐文章于 2024-05-24 18:17:08 发布

edvincecilia

最新推荐文章于 2024-05-24 18:17:08 发布

阅读量740

点赞数 9

文章标签： python 开发语言

本文链接：https://blog.csdn.net/qq_41037856/article/details/139077316

版权

在自动驾驶研究领域，NuScenes数据集是一个广泛使用的资源。

之前遇到过一个情况：在平时查看数据的时候经常想看下每张图片上的框和相对应的信息，但是nuscenes的toolkit没有自带函数可以输入一张图片的名称，输出对应的图像的深度信息、框的位置信息等。为此我写了一段代码实现了这些功能。以下是yaml里的内容：

nuscenes_path: 'v1.0-mini'  # 数据集路径
image_filename: "n008-2018-08-01-15-16-36-0400__CAM_BACK__1533151605037558.jpg"  # 图片文件名
camera_channel: 'CAM_BACK'  # 相机通道
save_directory: 'save'  # 保存目录

以下是效果

环境准备

首先，我们需要安装必要的库。如果还没有安装，可以使用以下命令进行安装：

pip install nuscenes-devkit opencv-python-headless pyquaternion pyyaml

代码实现

我们将通过一个Python脚本来加载并可视化NuScenes数据集中的图像及其标注信息。脚本主要功能如下：

加载NuScenes数据集的特定图片。
显示图片及其标注信息。
提供数据集内图片搜索功能。

初始化NuScenes数据集

首先，我们需要初始化NuScenes数据集并设置一些基本参数。

from nuscenes.nuscenes import NuScenes
import os
import cv2
import numpy as np
from pyquaternion import Quaternion

class nuscence_visualize:
    def __init__(self, nuscenes_path: str):
        self.nusc = NuScenes(version='v1.0-mini', dataroot=nuscenes_path)
        self.detection_class = {'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'}

获取3D对象信息

我们需要从注释中提取3D对象信息，并将其转换到相机坐标系中。

def get_obj3d_from_annotation(self, ann, ego_data, calib_data):
    obj_ann = dict()
    obj_type = set(ann['category_name'].split('.')).intersection(self.detection_class)
    if len(obj_type) == 0:
        return None
    obj_type = obj_type.pop()

    center = np.array(ann['translation'])
    orientation = np.array(ann['rotation'])
    quaternion = Quaternion(ego_data['rotation']).inverse
    center -= np.array(ego_data['translation'])
    center = np.dot(quaternion.rotation_matrix, center)
    orientation = quaternion * orientation

    quaternion = Quaternion(calib_data['rotation']).inverse
    center -= np.array(calib_data['translation'])
    center = np.dot(quaternion.rotation_matrix, center)
    orientation = quaternion * orientation

    x, y, z = center
    w, l, h = ann['size']
    x_corners = l / 2 * np.array([-1, 1, 1, -1, -1, 1, 1, -1])
    y_corners = w / 2 * np.array([1, 1, -1, -1, 1, 1, -1, -1])
    z_corners = h / 2 * np.array([-1, -1, -1, -1, 1, 1, 1, 1])

    box3d = np.vstack((x_corners, y_corners, z_corners))
    box3d = np.dot(orientation.rotation_matrix, box3d)
    box3d[0, :] = box3d[0, :] + x
    box3d[1, :] = box3d[1, :] + y
    box3d[2, :] = box3d[2, :] + z

    obj_ann['data_type'] = ann['data_type']
    obj_ann['type'] = obj_type
    obj_ann['box'] = box3d

    return obj_ann

投影3D对象到2D图像

接下来，我们需要将3D对象投影到2D图像上。

def project_obj2image(self, obj3d_list, intrinsic):
    obj2d_list = list()
    trans_mat = np.eye(4)
    trans_mat[:3, :3] = np.array(intrinsic)

    for obj in obj3d_list:
        in_front = obj['box'][2, :] > 0.1
        if all(in_front) is False:
            continue

        points = obj['box']
        points = np.concatenate((points, np.ones((1, points.shape[1]))), axis=0)
        transformed_points = np.dot(trans_mat, points)
        projected_points = transformed_points[:3, :] / transformed_points[2, :]

        center_point = np.mean(points[:3, :], axis=1)
        depth = center_point[2]

        obj2d = {'data_type': obj['data_type'], 'type': obj['type'], 'box': projected_points, 'depth': depth}
        obj2d_list.append(obj2d)

    return obj2d_list

在图像上绘制注释信息

我们需要在图像上绘制3D包围盒和深度信息。

def plot_annotation_info_camera_only(self, camera_img, obj_list):
    for obj in obj_list:
        obj_type = obj['type']
        box = obj['box'].astype(int)
        depth = obj['depth']
        color = (0, 255, 255) if obj_type == 'car' else (255, 255, 255)
        thickness = 2 if obj['data_type'] == 'gt' else 1

        for i in range(4):
            j = (i + 1) % 4
            cv2.line(camera_img, (box[0, i], box[1, i]), (box[0, j], box[1, j]), color, thickness)
            cv2.line(camera_img, (box[0, i + 4], box[1, i + 4]), (box[0, j + 4], box[1, j + 4]), color, thickness)
            cv2.line(camera_img, (box[0, i], box[1, i]), (box[0, i + 4], box[1, i + 4]), color, thickness)

        center_x = np.mean(box[0, :4])
        center_y = np.mean(box[1, :4])
        cv2.putText(camera_img, f'{depth:.0f}m', (int(center_x), int(center_y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

可视化单个样本

最后，我们将这些功能结合起来，编写一个方法来可视化单个样本的图像及其标注信息。

def visualize_one_sample_single(self, sample, camera_name='CAM_FRONT', results=None, visible_level=1, scale_ratio=1, save_dir=None):
    data_root = self.nusc.dataroot
    camera_data = self.nusc.get('sample_data', sample['data'][camera_name])
    img_path = os.path.join(data_root, camera_data['filename'])
    img = cv2.imread(img_path)
    cv2.putText(img, text=camera_name[4:], org=(50, 80), fontFace=cv2.FONT_HERSHEY_PLAIN, fontScale=4.0, thickness=3, color=(0, 0, 255))

    anns_info = []
    if results is not None:
        for res in results:
            if res['detection_score'] < 0.5:
                continue
            res['visibility_token'] = 4
            res['category_name'] = res['detection_name']
            res['data_type'] = 'result'
            anns_info.append(res)
    else:
        for token in sample['anns']:
            anns_data = self.nusc.get('sample_annotation', token)
            anns_data['data_type'] = 'result'
            anns_info.append(anns_data)

    calib_data = self.nusc.get('calibrated_sensor', camera_data['calibrated_sensor_token'])
    ego_data = self.nusc.get('ego_pose', camera_data['ego_pose_token'])
    obj3d_list = []
    for ann in anns_info:
        if int(ann['visibility_token']) < visible_level:
            continue
        obj = self.get_obj3d_from_annotation(ann, ego_data, calib_data)
        if obj is not None:
            obj3d_list.append(obj)
    
    obj2d_list = self.project_obj2image(obj3d_list, calib_data['camera_intrinsic'])
    self.plot_annotation_info_camera_only(img, obj2d_list)

    img_h, img_w, _ = img.shape
    img = cv2.resize(img, (int(img_w * scale_ratio), int(img_h * scale_ratio)))

    if save_dir is None:
        cv2.imshow('Visualization', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    else:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = os.path.join(save_dir, f'{camera_name}.jpg')
        cv2.imwrite(save_path, img)