从metashape导出深度图，从深度图恢复密集点云

最新推荐文章于 2025-03-19 16:40:23 发布

冷面杰拉德

最新推荐文章于 2025-03-19 16:40:23 发布

阅读量1k

点赞数 5

分类专栏：摄影测量文章标签： python

本文链接：https://blog.csdn.net/weixin_44153180/article/details/136415311

版权

摄影测量专栏收录该内容

11 篇文章

订阅专栏

从metashape导出深度图，从深度图恢复密集点云

1.从metashape导出深度图

参考：https://blog.csdn.net/WHU_StudentZhong/article/details/123107072?spm=1001.2014.3001.5502

2.从深度图建立密集点云

首先从metashape导出blockExchange格式的xml文件，里面记录了相机的内外参等信息
file->export->export camera
下面就开始上代码

import numpy as np
import xml.etree.ElementTree as ET
import os
import cv2


def write_point_cloud(ply_filename, points):
    formatted_points = []
    for point in points:
        formatted_points.append("%f %f %f %d %d %d 0\n" % (point[0], point[1], point[2], point[3], point[4], point[5]))

    out_file = open(ply_filename, "w")
    out_file.write('''ply
    format ascii 1.0
    element vertex %d
    property float x
    property float y
    property float z
    property uchar blue
    property uchar green
    property uchar red
    property uchar alpha
    end_header
    %s
    ''' % (len(points), "".join(formatted_points)))
    out_file.close()


def depth_image_to_point_cloud(rgb, depth, scale, K, pose):

    u = range(0, rgb.shape[1])
    v = range(0, rgb.shape[0])

    u, v = np.meshgrid(u, v)
    u = u.astype(float)
    v = v.astype(float)


    Z = depth.astype(float) / scale
    X = (u - K[0, 2]) * Z / K[0, 0]
    Y = (v - K[1, 2]) * Z / K[1, 1]


    X = np.ravel(X)
    Y = np.ravel(Y)
    Z = np.ravel(Z)

    valid = (Z > 0) & (Z < 300)
    # 计算 valid 中有效元素的数量
    valid_count = np.count_nonzero(valid)

    # 打印结果
    print("有效元素的数量:", valid_count)

    X = X[valid]
    Y = Y[valid]
    Z = Z[valid]

    position = np.vstack((X, Y, Z, np.ones(len(X))))
    position = np.dot(pose, position)

    R = np.ravel(rgb[:, :, 0])[valid]
    G = np.ravel(rgb[:, :, 1])[valid]
    B = np.ravel(rgb[:, :, 2])[valid]

    points = np.transpose(np.vstack((position[0:3, :], R, G, B))).tolist()

    return points


def getDepth(position,depth, scale, K, pose):
    inverse_pose = np.linalg.inv(pose)
    position_camera=np.dot(inverse_pose, position)
    
    Z = position_camera[2]*scale
    u=round(position_camera[0]*K[0, 0]/Z+K[0, 2])
    v=round(position_camera[1]*K[1, 1]/Z+K[1, 2])
    z=depth[v][u]
    return Z
    


# image_files: XXXXXX.jpg (RGB, 24-bit, jpg)
# depth_files: XXXXXX.tif (32-bit, tif)
# poses: camera-to-world, 4×4 matrix in homogeneous coordinates
def build_point_cloud(K,dist_coeffs, scale, view_ply_in_world_coordinate,images,dataPath):
    images_path = os.path.join(dataPath, 'undistort')
    depth_path = os.path.join(dataPath, 'depth')

    for id, image in images.items():
        # 获取文件所在的文件夹路径
        
        image_name=os.path.join(images_path,image['imgName'])
        depth_name=os.path.join(depth_path,image['imgName'])
        depth_name=os.path.splitext(depth_name)[0]
        depth_name = depth_name + '.tif'
        rgb = cv2.imread(image_name)
        depth = cv2.imread(depth_name, cv2.IMREAD_UNCHANGED)
        height, width = depth.shape[:2]
        # # 创建深度图畸变映射
        # mapx, mapy = cv2.initUndistortRectifyMap(K, dist_coeffs, None, None, (width,height), cv2.CV_32FC1)
        # # 应用映射
        # depth = cv2.remap(depth, mapx, mapy, interpolation=cv2.INTER_NEAREST)

        rgb_resized=cv2.resize(rgb,(width, height))
        # # 创建rgb畸变映射
        # mapx, mapy = cv2.initUndistortRectifyMap(K, dist_coeffs, None, None, (width,height), cv2.CV_32FC1)
        # rgb_resized = cv2.remap(rgb_resized, mapx, mapy, interpolation=cv2.INTER_NEAREST)
        
        depth_values = depth.astype(np.float32)
        #显示深度值范围
        print('最小深度值:', np.min(depth_values))
        print('最大深度值:', np.max(depth_values))

        if view_ply_in_world_coordinate:
            current_points_3D = depth_image_to_point_cloud(rgb_resized, depth, scale=scale, K=K, pose=image['trans_M'])
        else:
            current_points_3D = depth_image_to_point_cloud(rgb_resized, depth, scale=scale, K=K, pose=image['trans_M'])
        save_ply_name = os.path.basename(os.path.splitext(image_name)[0]) + ".ply"
        #save_ply_path = os.path.join(dataPath, "point_clouds_frompoint")
        save_ply_path = os.path.join(dataPath, "point_clouds_any")

        if not os.path.exists(save_ply_path):  # 判断是否存在文件夹如果不存在则创建为文件夹
            os.mkdir(save_ply_path)
        write_point_cloud(os.path.join(save_ply_path, save_ply_name), current_points_3D)

def compute_K_matrix(focal_length, principal_point_x, principal_point_y):
    """
    计算内参数矩阵 K
    
    参数:
        focal_length:焦距
        principal_point_x: 主点在 x 方向上的像素坐标
        principal_point_y: 主点在 y 方向上的像素坐标
        skew: 像素间的 skew factor，默认为 0
    返回值:
        K 矩阵
    """
    K = np.array([[focal_length, 0, principal_point_x],
                  [0, focal_length, principal_point_y],
                  [0, 0, 1]])
    return K

def loadAllfrom_xml(path):
    # 解析 XML 文件
    doc = ET.parse(path)
    root = doc.getroot()

    # 初始化相机和图像列表
    cameras = {}
    images = {}

    # 查找相机元素
    photogroups = root.find(".//Photogroups")
    if photogroups is None:
        print("error: invalid scene file")
        return False

    # 解析相机信息
    for photogroup in photogroups.findall("Photogroup"):
        camera_model_type = photogroup.find("CameraModelType")
        if camera_model_type is None or camera_model_type.text != "Perspective":
            continue

        image_dimensions = photogroup.find("ImageDimensions")
        if image_dimensions is None:
            continue
        width = int(image_dimensions.find("Width").text)
        height = int(image_dimensions.find("Height").text)
        resolution_scale = max(width, height)

        focal_length_pixels = photogroup.find("FocalLengthPixels")
        if focal_length_pixels is not None:
            f = float(focal_length_pixels.text)
        else:
            focal_length = float(photogroup.find("FocalLength").text)
            sensor_size = float(photogroup.find("SensorSize").text)
            f = focal_length * resolution_scale / sensor_size

        principal_point = photogroup.find("PrincipalPoint")
        if principal_point is not None:
            cx = float(principal_point.find("x").text) 
            cy = float(principal_point.find("y").text)


        pxl_size = 1

        # 解析畸变参数
        distortion = photogroup.find("Distortion")
        k1 = k2 = k3 = p1 = p2 = 0
        if distortion is not None:
            k1_elem = distortion.find("K1")
            if k1_elem is not None:
                k1 = float(k1_elem.text)
            k2_elem = distortion.find("K2")
            if k2_elem is not None:
                k2 = float(k2_elem.text)
            k3_elem = distortion.find("K3")
            if k3_elem is not None:
                k3 = float(k3_elem.text)
            p1_elem = distortion.find("P1")
            if p1_elem is not None:
                p1 = float(p1_elem.text)
            p2_elem = distortion.find("P2")
            if p2_elem is not None:
                p2 = float(p2_elem.text)  
                
        camera = {'width': width, 'height': height, 'f': f, 'cx': cx, 'cy': cy, 'pxlSize': pxl_size,
                  'k1': k1, 'k2': k2, 'k3': k3, 'p1': p2, 'p2': p1}
        cameras[1] = camera

        # 解析图像信息
        for photo in photogroup.findall("Photo"):
            img_id = int(photo.find("Id").text)
            image_path = photo.find("ImagePath").text
            found = image_path.rfind("/")
            img_name = image_path[found + 1:]

            photo_pose = photo.find("Pose")
            if photo_pose is None:
                continue
            rotation = photo_pose.find("Rotation")
            if rotation is None:
                continue
            rotation_matrix = np.array([[float(rotation.find("M_00").text), float(rotation.find("M_01").text),float(rotation.find("M_02").text)],
                            [float(rotation.find("M_10").text),float(rotation.find("M_11").text), float(rotation.find("M_12").text)],
                            [float(rotation.find("M_20").text), float(rotation.find("M_21").text),float(rotation.find("M_22").text)]])

            
            center = photo_pose.find("Center")
            if center is None:
                continue
            Xs = float(center.find("x").text)
            Ys = float(center.find("y").text)
            Zs = float(center.find("z").text)
            position= np.array([Xs,Ys,Zs])
            trans=create_transformation_matrix(rotation_matrix,position)

            img = {'iid': img_id,'image_path':image_path, 'imgName': img_name,'R':rotation_matrix,
                   'Xs': Xs, 'Ys': Ys, 'Zs': Zs,'trans_M':trans}
            images[img_id] = img

    return images, cameras

def create_transformation_matrix(rotation_matrix, translation_vector):
    # 创建一个 4x4 的单位矩阵
    transformation_matrix = np.eye(4)
    
    transformation_matrix[:3, :3] = rotation_matrix.T#将rotation求逆
    
    transformation_matrix[:3, 3] = translation_vector
    
    return transformation_matrix


images, cameras = loadAllfrom_xml('pos_undistort.xml')
K=compute_K_matrix(cameras[1]['f'],cameras[1]['cx'],cameras[1]['cy'])
dist_coeffs = np.array([cameras[1]['k1'],cameras[1]['k2'], cameras[1]['p1'], cameras[1]['p2'], cameras[1]['k3']])

view_ply_in_world_coordinate = True
K=K*0.5
K[2][2]=1

build_point_cloud(K,dist_coeffs*0.5,1,view_ply_in_world_coordinate,images,'G:\\graduate2024\\experiment\\test')