基于d435i相机与yolov8关键点算法测量物体尺寸

倩狗狗睡觉觉

已于 2024-08-06 15:28:59 修改

阅读量236

点赞数 1

分类专栏：基于d435i深度相机的深度学习算法文章标签：数码相机 YOLO 人工智能算法

于 2024-08-05 15:14:05 首次发布

本文链接：https://blog.csdn.net/weixin_44913833/article/details/140917547

版权

基于d435i深度相机的深度学习算法专栏收录该内容

2 篇文章 0 订阅

订阅专栏

一、概况

本文思想利用关键点算法得到物体两端的点，得到(x1,y1),(x2,y2)；然后使用深度相机d43i得到对应两点的深度值，即得到两点的三维坐标，最后算出两点之间的三维距离，即可拟合物体真实距离。

二、关键点算法

采用yolov8关键点算法，首先训练安装labelme

打开cmd

conda activate pytorch1.10.0
labelme

创建矩形：cup

创建控制点：0，1

将全部json文件汇总到keypoints.json 代码：

import os
import sys
import glob
import json
import argparse
import numpy as np
from tqdm import tqdm
from labelme import utils


class Labelme2coco_keypoints():
    def __init__(self, args):
        """
        Lableme 关键点数据集转 COCO 数据集的构造函数:

        Args
            args：命令行输入的参数
                - class_name 根类名字

        """

        self.classname_to_id = {args.class_name: 1}
        self.images = []
        self.annotations = []
        self.categories = []
        self.ann_id = 0
        self.img_id = 0

    def save_coco_json(self, instance, save_path):
        json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1)

    def read_jsonfile(self, path):
        with open(path, "r", encoding='utf-8') as f:
            return json.load(f)

    def _get_box(self, points):
        min_x = min_y = np.inf
        max_x = max_y = 0
        for x, y in points:
            min_x = min(min_x, x)
            min_y = min(min_y, y)
            max_x = max(max_x, x)
            max_y = max(max_y, y)
        return [min_x, min_y, max_x - min_x, max_y - min_y]

    def _get_keypoints(self, points, keypoints, num_keypoints):
        """
        解析 labelme 的原始数据， 生成 coco 标注的 关键点对象

        例如：
            "keypoints": [
                67.06149888292556,  # x 的值
                122.5043507571318,  # y 的值
                1,                  # 相当于 Z 值，如果是2D关键点 0：不可见 1：表示可见。
                82.42582269256718,
                109.95672933232304,
                1,
                ...,
            ],

        """

        if points[0] == 0 and points[1] == 0:
            visable = 0
        else:
            visable = 1
            num_keypoints += 1
        keypoints.extend([points[0], points[1], visable])
        return keypoints, num_keypoints

    def _image(self, obj, path):
        """
        解析 labelme 的 obj 对象，生成 coco 的 image 对象

        生成包括：id，file_name，height，width 4个属性

        示例：
             {
                "file_name": "training/rgb/00031426.jpg",
                "height": 224,
                "width": 224,
                "id": 31426
            }

        """

        image = {}

        img_x = utils.img_b64_to_arr(obj['imageData'])  # 获得原始 labelme 标签的 imageData 属性，并通过 labelme 的工具方法转成 array
        image['height'], image['width'] = img_x.shape[:-1]  # 获得图片的宽高

        # self.img_id = int(os.path.basename(path).split(".json")[0])
        self.img_id = self.img_id + 1
        image['id'] = self.img_id

        image['file_name'] = os.path.basename(path).replace(".json", ".jpg")

        return image

    def _annotation(self, bboxes_list, keypoints_list, json_path):
        """
        生成coco标注

        Args：
            bboxes_list： 矩形标注框
            keypoints_list： 关键点
            json_path：json文件路径

        """

        if len(keypoints_list) != args.join_num * len(bboxes_list):
            print('you loss {} keypoint(s) with file {}'.format(args.join_num * len(bboxes_list) - len(keypoints_list), json_path))
            print('Please check ！！！')
            sys.exit()
        i = 0
        for object in bboxes_list:
            annotation = {}
            keypoints = []
            num_keypoints = 0

            label = object['label']
            bbox = object['points']
            annotation['id'] = self.ann_id
            annotation['image_id'] = self.img_id
            annotation['category_id'] = int(self.classname_to_id[label])
            annotation['iscrowd'] = 0
            annotation['area'] = 1.0
            annotation['segmentation'] = [np.asarray(bbox).flatten().tolist()]
            annotation['bbox'] = self._get_box(bbox)

            for keypoint in keypoints_list[i * args.join_num: (i + 1) * args.join_num]:
                point = keypoint['points']
                annotation['keypoints'], num_keypoints = self._get_keypoints(point[0], keypoints, num_keypoints)
            annotation['num_keypoints'] = num_keypoints

            i += 1
            self.ann_id += 1
            self.annotations.append(annotation)

    def _init_categories(self):
        """
        初始化 COCO 的 标注类别

        例如：
        "categories": [
            {
                "supercategory": "hand",
                "id": 1,
                "name": "hand",
                "keypoints": [
                    "wrist",
                    "thumb1",
                    "thumb2",
                    ...,
                ],
                "skeleton": [
                ]
            }
        ]
        """

        for name, id in self.classname_to_id.items():
            category = {}

            category['supercategory'] = name
            category['id'] = id
            category['name'] = name
            # 17 个关键点数据
            category['keypoint'] = [str(i + 1) for i in range(args.join_num)]

            self.categories.append(category)

    def to_coco(self, json_path_list):
        """
        Labelme 原始标签转换成 coco 数据集格式，生成的包括标签和图像

        Args：
            json_path_list：原始数据集的目录

        """

        self._init_categories()

        for json_path in tqdm(json_path_list):
            obj = self.read_jsonfile(json_path)  # 解析一个标注文件
            self.images.append(self._image(obj, json_path))  # 解析图片
            shapes = obj['shapes']  # 读取 labelme shape 标注

            bboxes_list, keypoints_list = [], []
            for shape in shapes:
                if shape['shape_type'] == 'rectangle':  # bboxs
                    bboxes_list.append(shape)           # keypoints
                elif shape['shape_type'] == 'point':
                    keypoints_list.append(shape)

            self._annotation(bboxes_list, keypoints_list, json_path)

        keypoints = {}
        keypoints['info'] = {'description': 'Lableme Dataset', 'version': 1.0, 'year': 2021}
        keypoints['license'] = ['BUAA']
        keypoints['images'] = self.images
        keypoints['annotations'] = self.annotations
        keypoints['categories'] = self.categories
        return keypoints


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--class_name", default="cup", help="class name", type=str)
    parser.add_argument("--input",  default=r"F:\xushuanglong\datasets\cup\cup\train\json", help="json file path (labelme)", type=str)
    parser.add_argument("--output", default=r"F:\xushuanglong\datasets\cup\cup\train", help="output file path (coco format)", type=str)
    parser.add_argument("--join_num",  default=2, help="number of join", type=int)
    # parser.add_argument("--ratio", help="train and test split ratio", type=float, default=0.5)
    args = parser.parse_args()

    labelme_path = args.input
    saved_coco_path = args.output

    json_list_path = glob.glob(labelme_path + "/*.json")

    print('{} for json files'.format(len(json_list_path)))
    print('Start transform please wait ...')

    l2c_json = Labelme2coco_keypoints(args)  # 构造数据集生成类

    # 生成coco类型数据
    keypoints = l2c_json.to_coco(json_list_path)
    l2c_json.save_coco_json(keypoints, os.path.join(saved_coco_path, "keypoints.json"))

然后将keypoints.json文件转变为yolo格式标签文件代码：

# COCO 格式的数据集转化为 YOLO 格式的数据集
# --json_path 输入的json文件路径
# --save_path 保存的文件夹名字，默认为当前目录下的labels。

import os
import json
from tqdm import tqdm
import argparse

parser = argparse.ArgumentParser()
# 这里根据自己的json文件位置，换成自己的就行
parser.add_argument('--json_path',
                    default=r'F:\xushuanglong\datasets\cup\cup\train\keypoints.json', type=str,
                    help="input: coco format(json)")
# 这里设置.txt文件保存位置
parser.add_argument('--save_path', default=r'F:\xushuanglong\datasets\cup\cup\train\labels', type=str,
                    help="specify where to save the output dir of labels")
arg = parser.parse_args()


def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = box[0] + box[2] / 2.0
    y = box[1] + box[3] / 2.0
    w = box[2]
    h = box[3]

    x = round(x * dw, 6)
    w = round(w * dw, 6)
    y = round(y * dh, 6)
    h = round(h * dh, 6)
    return (x, y, w, h)


if __name__ == '__main__':
    json_file = arg.json_path  # COCO Object Instance 类型的标注
    ana_txt_save_path = arg.save_path  # 保存的路径

    data = json.load(open(json_file, 'r'))
    if not os.path.exists(ana_txt_save_path):
        os.makedirs(ana_txt_save_path)

    id_map = {}  # coco数据集的id不连续！重新映射一下再输出！
    with open(os.path.join(ana_txt_save_path, 'classes.txt'), 'w') as f:
        # 写入classes.txt
        for i, category in enumerate(data['categories']):
            f.write(category['name']+"\n")
            id_map[category['id']] = i
    # print(id_map)
    # 这里需要根据自己的需要，更改写入图像相对路径的文件位置。
    # list_file = open(os.path.join(ana_txt_save_path, 'train2017.txt'), 'w')
    for img in tqdm(data['images']):
        filename = img["file_name"]
        img_width = img["width"]
        img_height = img["height"]
        img_id = img["id"]
        head, tail = os.path.splitext(filename)
        ana_txt_name = head + ".txt"  # 对应的txt名字，与jpg一致
        f_txt = open(os.path.join(ana_txt_save_path, ana_txt_name), 'w')
        for ann in data['annotations']:
            if ann['image_id'] == img_id:
                box = convert((img_width, img_height), ann["bbox"])
                f_txt.write("%s %s %s %s %s" % (id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
                counter=0
                for i in range(len(ann["keypoints"])):
                    if ann["keypoints"][i] == 2 or ann["keypoints"][i] == 1 or ann["keypoints"][i] == 0:
                        f_txt.write(" %s " % format(ann["keypoints"][i] + 1,'6f'))
                        counter=0
                    else:
                        if counter==0:
                            f_txt.write(" %s " % round((ann["keypoints"][i] / img_width),6))
                        else:
                            f_txt.write(" %s " % round((ann["keypoints"][i] / img_height),6))
                        counter+=1
        f_txt.write("\n")
        f_txt.close()

然后删除yolo格式标签里面所有 ‘ 2.000000’ 代码：

import os


def remove_value_from_file(file_path, value_to_remove):
    with open(file_path, 'r') as file:
        content = file.read()

    # 删除指定的值
    updated_content = content.replace(value_to_remove, '').strip()

    with open(file_path, 'w') as file:
        file.write(updated_content)


def process_directory(directory_path, value_to_remove):
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(directory_path, filename)
            remove_value_from_file(file_path, value_to_remove)
            print(f"Processed file: {file_path}")


if __name__ == "__main__":
    directory_path = r"F:\xushuanglong\datasets\cup\cup\train\labels"  # 替换为你的文件夹路径
    value_to_remove = "  2.000000"
    process_directory(directory_path, value_to_remove)

训练：

from ultralytics import YOLO

# Load a model
model = YOLO(r'F:\xushuanglong\code\yolov8-pose-tiger\yolov8-tiger\ultralytics\weights\yolov8n-pose.pt')  # load a pretrained model (recommended for training)

# Train the model
results = model.train(data=r'F:\xushuanglong\code\yolov8-pose-tiger\yolov8-tiger\ultralytics\cup-pose.yaml', epochs=100, imgsz=640,  device='cuda')

三、测量物体尺寸

获取对齐图像帧与相机参数

    frames = pipeline.wait_for_frames()  # 等待获取图像帧，获取颜色和深度的框架集
    aligned_frames = align.process(frames)  # 获取对齐帧，将深度框与颜色框对齐

    aligned_depth_frame = aligned_frames.get_depth_frame()  # 获取对齐帧中的的depth帧
    aligned_color_frame = aligned_frames.get_color_frame()  # 获取对齐帧中的的color帧

    #### 获取相机参数 ####
    depth_intrin = aligned_depth_frame.profile.as_video_stream_profile().intrinsics  # 获取深度参数（像素坐标系转相机坐标系会用到）
    color_intrin = aligned_color_frame.profile.as_video_stream_profile().intrinsics  # 获取相机内参


    #### 将images转为numpy arrays ####
    img_color = np.asanyarray(aligned_color_frame.get_data())  # RGB图
    img_depth = np.asanyarray(aligned_depth_frame.get_data())  # 深度图（默认16位）

检测：

            # 检测
            result = list(model(image, conf=0.3, stream=True))[0]  # inference
            boxes = result.boxes  # Boxes object for bbox outputs
            boxes = boxes.cpu().numpy()  # convert to numpy array

遍历keypoints

            # 遍历keypoints
            keypoints = result.keypoints  # Keypoints object for pose outputs
            keypoints = keypoints.cpu().numpy()  # convert to numpy array

得到关键点二维坐标

                    point1 = (int(x0), int(y0))
                    point2 = (int(x1), int(y1))

获取两点深度

point1_dis = depth_frame.get_distance(x0, y0)  # 获取第一个点的深度值
 point2_dis = depth_frame.get_distance(x1, y1)  # 获取第二个点的深度值

得到两点三维坐标

threeD_point1 = rs.rs2_deproject_pixel_to_point(depth_intrin, [x0, y0], point1_dis)
threeD_point2 = rs.rs2_deproject_pixel_to_point(depth_intrin, [x1, y1], point2_dis)

计算两点之间的距离

 real_distance = np.linalg.norm(np.array(threeD_point2) - np.array(threeD_point1))

最终效果：

精度还是非常准，我的杯子大概 24cm,精度差异主要在于关键点的推理上，深度相机d435i的测距还是很准的。

倩狗狗睡觉觉

关注

1
点赞
踩
5

收藏

觉得还不错? 一键收藏
2
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录