通过物体的位置进行物体位姿掩码的自动标注

文章介绍了如何利用物体的CAD模型和位置信息,结合HALCON库实现物体掩码的自动标注,通过3D重建和相机位姿计算来改进标注效果,特别提到了ObjectDatasetTools库在构建位姿估计数据集中的应用。
摘要由CSDN通过智能技术生成

通过物体的位置进行物体位姿掩码的自动标注

利用物体的CAD和物体的位置值进行掩码标注

我们参考DEEPMIND提供的标注工具,通过SAM构建了半自动化的标注工具,但是对我们的数据集来说,分割的结果并不好,但是我们场景中的物体具有CAD模型这个先验的条件,如果没有CAD模型可以使用3D重建方法进行重建,例如colmap方法获取物体的3D模型和相机的位姿值,是一个可行的方法。我们使用的方法是通过HALCON进行获取相机的位姿值,通过CAD模型转换为mesh,通过trimesh进行渲染得到,我们的代码来自物体六自由度位姿估计方法的数据集构造方法库,
ObjectDatasetTools. 这是一个构建位姿估计数据集的通用库,在此记录一下具体的代码:

#通过物体的位姿值和物体的对物体的掩码进行计算
# copy from https://github.com/BraveBoBo/ObjectDatasetTools
# 使用halcon的库进行计算 物体的实际位姿只能通过halcon的库进行计算

# 已知物体的位姿值
"""
create_label_files.py
---------------

This script produces:

1. Reorient the processed registered_scene mesh in a mesh with an AABB centered at the
   origin and the same dimensions as the OBB, saved under the name foldername.ply
2. Create label files with class labels and projections of 3D BBs in the format
   singleshotpose requires, saved under labels
3. Create pixel-wise masks, saved under mask
4. Save the homogeneous transform of object in regards to the foldername.ply in each 
   frame
"""

import os
import cv2
import glob
import trimesh
import numpy as np
import sys
from tqdm import trange
from scipy.optimize import minimize
import json

cam_intrinsic = np.array([
                    [5446.400, 0.0, 925.576],
                    [0.0, 5447.326, 553.982],
                    [0.0, 0.0, 1.0]])
# halcon相机内参值


def get_camera_intrinsic(folder='./data/'):
    with open(folder+'intrinsics.json', 'r') as f:
        camera_intrinsics = json.load(f)
        
    K = np.zeros((3, 3), dtype='float64')
    K[0, 0], K[0, 2] = float(camera_intrinsics['fx']), float(camera_intrinsics['ppx'])
    K[1, 1], K[1, 2] = float(camera_intrinsics['fy']), float(camera_intrinsics['ppy'])

    K[2, 2] = 1.
    return (camera_intrinsics, K)

def compute_projection(points_3D,internal_calibration):
    points_3D = points_3D.T
    projections_2d = np.zeros((2, points_3D.shape[1]), dtype='float32')
    camera_projection = (internal_calibration).dot(points_3D)
    projections_2d[0, :] = camera_projection[0, :]/camera_projection[2, :]
    projections_2d[1, :] = camera_projection[1, :]/camera_projection[2, :]
    return projections_2d


def print_usage():
    
    print("Usage: create_label_files.py <path>")
    print("path: all or name of the folder")
    print("e.g., create_label_files.py all, create_label_files.py LINEMOD/Cheezit")
    
## 将txt转为npy文件 储存位姿值 
def get_length(folder):
    return len(os.listdir(folder))

def load_txt(folder,id):
    pose = np.loadtxt(folder + f"\{id:06d}" + ".txt")
    pose = pose.reshape(3,4)
    pose = np.vstack((pose, [0, 0, 0, 1]))
    return pose
    
    
if __name__ == "__main__":
    classlabel ='target'
    folder = './data/'
    camera_intrinsics, K = get_camera_intrinsic(folder)
    path_label = folder + "labels"
    if not os.path.exists(path_label):
        os.makedirs(path_label)

    path_mask = folder + "mask"
    if not os.path.exists(path_mask):
        os.makedirs(path_mask)

    path_transforms = folder + "transforms"
    if not os.path.exists(path_transforms):
        os.makedirs(path_transforms)

    transforms_file = folder + "transforms.npy"
    try:
        transforms = np.load(transforms_file)
    except:
        print("transforms not computed, run compute_gt_poses.py first")
    
    mesh = trimesh.load(os.path.join(folder,"model","color_target.ply"))
    mesh.apply_scale(10)

    Tform = mesh.apply_obb()
    
    mesh.export(file_obj = "./data/model/target.ply")

    points = mesh.bounding_box.vertices
    center = mesh.centroid
    min_x = np.min(points[:,0])
    min_y = np.min(points[:,1])
    min_z = np.min(points[:,2])
    max_x = np.max(points[:,0])
    max_y = np.max(points[:,1])
    max_z = np.max(points[:,2])
    points = np.array([[min_x, min_y, min_z], [min_x, min_y, max_z], [min_x, max_y, min_z],
                        [min_x, max_y, max_z], [max_x, min_y, min_z], [max_x, min_y, max_z],
                        [max_x, max_y, min_z], [max_x, max_y, max_z]])

    points_original = np.concatenate((np.array([[center[0],center[1],center[2]]]), points))
    points_original = trimesh.transformations.transform_points(points_original,
                                                                np.linalg.inv(Tform))
                
    projections = [[],[]]
    
    for i in trange(len(transforms)):
        mesh_copy = mesh.copy()
        img = cv2.imread(folder+"rgb/"+f"{i:06d}" + ".png")
        transform = transforms[i]
        # transform = np.linalg.inv(transforms[i]) # 根据实验这个halcon标注的位姿值是反的 不需要进行取反的操作
        transformed = trimesh.transformations.transform_points(points_original, transform)

        
        corners = compute_projection(transformed,K)
        corners = corners.T
        corners[:,0] = corners[:,0]/int(camera_intrinsics['width'])
        corners[:,1] = corners[:,1]/int(camera_intrinsics['height'])

        T = np.dot(transform, np.linalg.inv(Tform))
        mesh_copy.apply_transform(T)
        filename = path_transforms + "/"+ str(i)+".npy"
        np.save(filename, T)
        
        sample_points = mesh_copy.sample(10000)
        masks = compute_projection(sample_points,K)
        masks = masks.T


        min_x = np.min(masks[:,0])
        min_y = np.min(masks[:,1])
        max_x = np.max(masks[:,0])
        max_y = np.max(masks[:,1])


        image_mask = np.zeros(img.shape[:2],dtype = np.uint8)
        for pixel in masks:
            cv2.circle(image_mask,(int(pixel[0]),int(pixel[1])), 5, 255, -1)

        thresh = cv2.threshold(image_mask, 30, 255, cv2.THRESH_BINARY)[1]

        contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)

        image_mask = np.zeros(img.shape[:2],dtype = np.uint8)
        cv2.drawContours(image_mask, [cnt], -1, 255, -1)

        mask_path = path_mask+"/"+ str(i)+".png"
        cv2.imwrite(mask_path, image_mask)
            
        
        
        file = open(path_label+"/"+ str(i)+".txt","w")
        message = str(classlabel)[:8] + " "
        file.write(message)
        for pixel in corners:
            for digit in pixel:
                message = str(digit)[:8]  + " "
                file.write(message)
        message = str((max_x-min_x)/float(camera_intrinsics['width']))[:8]  + " "
        file.write(message) 
        message = str((max_y-min_y)/float(camera_intrinsics['height']))[:8]
        file.write(message)
        file.close()
  • 8
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值