从metashape导出深度图,从深度图恢复密集点云
1.从metashape导出深度图
参考:https://blog.csdn.net/WHU_StudentZhong/article/details/123107072?spm=1001.2014.3001.5502
2.从深度图建立密集点云
首先从metashape导出blockExchange格式的xml文件,里面记录了相机的内外参等信息
file->export->export camera
下面就开始上代码
import numpy as np
import xml.etree.ElementTree as ET
import os
import cv2
def write_point_cloud(ply_filename, points):
formatted_points = []
for point in points:
formatted_points.append("%f %f %f %d %d %d 0\n" % (point[0], point[1], point[2], point[3], point[4], point[5]))
out_file = open(ply_filename, "w")
out_file.write('''ply
format ascii 1.0
element vertex %d
property float x
property float y
property float z
property uchar blue
property uchar green
property uchar red
property uchar alpha
end_header
%s
''' % (len(points), "".join(formatted_points)))
out_file.close()
def depth_image_to_point_cloud(rgb, depth, scale, K, pose):
u = range(0, rgb.shape[1])
v = range(0, rgb.shape[0])
u, v = np.meshgrid(u, v)
u = u.astype(float)
v = v.astype(float)
Z = depth.astype(float) / scale
X = (u - K[0, 2]) * Z / K[0, 0]
Y = (v - K[1, 2]) * Z / K[1, 1]
X = np.ravel(X)
Y = np.ravel(Y)
Z = np.ravel(Z)
valid = (Z > 0) & (Z < 300)
# 计算 valid 中有效元素的数量
valid_count = np.count_nonzero(valid)
# 打印结果
print("有效元素的数量:", valid_count)
X = X[valid]
Y = Y[valid]
Z = Z[valid]
position = np.vstack((X, Y, Z, np.ones(len(X))))
position = np.dot(pose, position)
R = np.ravel(rgb[:, :, 0])[valid]
G = np.ravel(rgb[:, :, 1])[valid]
B = np.ravel(rgb[:, :, 2])[valid]
points = np.transpose(np.vstack((position[0:3, :], R, G, B))).tolist()
return points
def getDepth(position,depth, scale, K, pose):
inverse_pose = np.linalg.inv(pose)
position_camera=np.dot(inverse_pose, position)
Z = position_camera[2]*scale
u=round(position_camera[0]*K[0, 0]/Z+K[0, 2])
v=round(position_camera[1]*K[1, 1]/Z+K[1, 2])
z=depth[v][u]
return Z
# image_files: XXXXXX.jpg (RGB, 24-bit, jpg)
# depth_files: XXXXXX.tif (32-bit, tif)
# poses: camera-to-world, 4×4 matrix in homogeneous coordinates
def build_point_cloud(K,dist_coeffs, scale, view_ply_in_world_coordinate,images,dataPath):
images_path = os.path.join(dataPath, 'undistort')
depth_path = os.path.join(dataPath, 'depth')
for id, image in images.items():
# 获取文件所在的文件夹路径
image_name=os.path.join(images_path,image['imgName'])
depth_name=os.path.join(depth_path,image['imgName'])
depth_name=os.path.splitext(depth_name)[0]
depth_name = depth_name + '.tif'
rgb = cv2.imread(image_name)
depth = cv2.imread(depth_name, cv2.IMREAD_UNCHANGED)
height, width = depth.shape[:2]
# # 创建深度图畸变映射
# mapx, mapy = cv2.initUndistortRectifyMap(K, dist_coeffs, None, None, (width,height), cv2.CV_32FC1)
# # 应用映射
# depth = cv2.remap(depth, mapx, mapy, interpolation=cv2.INTER_NEAREST)
rgb_resized=cv2.resize(rgb,(width, height))
# # 创建rgb畸变映射
# mapx, mapy = cv2.initUndistortRectifyMap(K, dist_coeffs, None, None, (width,height), cv2.CV_32FC1)
# rgb_resized = cv2.remap(rgb_resized, mapx, mapy, interpolation=cv2.INTER_NEAREST)
depth_values = depth.astype(np.float32)
#显示深度值范围
print('最小深度值:', np.min(depth_values))
print('最大深度值:', np.max(depth_values))
if view_ply_in_world_coordinate:
current_points_3D = depth_image_to_point_cloud(rgb_resized, depth, scale=scale, K=K, pose=image['trans_M'])
else:
current_points_3D = depth_image_to_point_cloud(rgb_resized, depth, scale=scale, K=K, pose=image['trans_M'])
save_ply_name = os.path.basename(os.path.splitext(image_name)[0]) + ".ply"
#save_ply_path = os.path.join(dataPath, "point_clouds_frompoint")
save_ply_path = os.path.join(dataPath, "point_clouds_any")
if not os.path.exists(save_ply_path): # 判断是否存在文件夹如果不存在则创建为文件夹
os.mkdir(save_ply_path)
write_point_cloud(os.path.join(save_ply_path, save_ply_name), current_points_3D)
def compute_K_matrix(focal_length, principal_point_x, principal_point_y):
"""
计算内参数矩阵 K
参数:
focal_length:焦距
principal_point_x: 主点在 x 方向上的像素坐标
principal_point_y: 主点在 y 方向上的像素坐标
skew: 像素间的 skew factor,默认为 0
返回值:
K 矩阵
"""
K = np.array([[focal_length, 0, principal_point_x],
[0, focal_length, principal_point_y],
[0, 0, 1]])
return K
def loadAllfrom_xml(path):
# 解析 XML 文件
doc = ET.parse(path)
root = doc.getroot()
# 初始化相机和图像列表
cameras = {}
images = {}
# 查找相机元素
photogroups = root.find(".//Photogroups")
if photogroups is None:
print("error: invalid scene file")
return False
# 解析相机信息
for photogroup in photogroups.findall("Photogroup"):
camera_model_type = photogroup.find("CameraModelType")
if camera_model_type is None or camera_model_type.text != "Perspective":
continue
image_dimensions = photogroup.find("ImageDimensions")
if image_dimensions is None:
continue
width = int(image_dimensions.find("Width").text)
height = int(image_dimensions.find("Height").text)
resolution_scale = max(width, height)
focal_length_pixels = photogroup.find("FocalLengthPixels")
if focal_length_pixels is not None:
f = float(focal_length_pixels.text)
else:
focal_length = float(photogroup.find("FocalLength").text)
sensor_size = float(photogroup.find("SensorSize").text)
f = focal_length * resolution_scale / sensor_size
principal_point = photogroup.find("PrincipalPoint")
if principal_point is not None:
cx = float(principal_point.find("x").text)
cy = float(principal_point.find("y").text)
pxl_size = 1
# 解析畸变参数
distortion = photogroup.find("Distortion")
k1 = k2 = k3 = p1 = p2 = 0
if distortion is not None:
k1_elem = distortion.find("K1")
if k1_elem is not None:
k1 = float(k1_elem.text)
k2_elem = distortion.find("K2")
if k2_elem is not None:
k2 = float(k2_elem.text)
k3_elem = distortion.find("K3")
if k3_elem is not None:
k3 = float(k3_elem.text)
p1_elem = distortion.find("P1")
if p1_elem is not None:
p1 = float(p1_elem.text)
p2_elem = distortion.find("P2")
if p2_elem is not None:
p2 = float(p2_elem.text)
camera = {'width': width, 'height': height, 'f': f, 'cx': cx, 'cy': cy, 'pxlSize': pxl_size,
'k1': k1, 'k2': k2, 'k3': k3, 'p1': p2, 'p2': p1}
cameras[1] = camera
# 解析图像信息
for photo in photogroup.findall("Photo"):
img_id = int(photo.find("Id").text)
image_path = photo.find("ImagePath").text
found = image_path.rfind("/")
img_name = image_path[found + 1:]
photo_pose = photo.find("Pose")
if photo_pose is None:
continue
rotation = photo_pose.find("Rotation")
if rotation is None:
continue
rotation_matrix = np.array([[float(rotation.find("M_00").text), float(rotation.find("M_01").text),float(rotation.find("M_02").text)],
[float(rotation.find("M_10").text),float(rotation.find("M_11").text), float(rotation.find("M_12").text)],
[float(rotation.find("M_20").text), float(rotation.find("M_21").text),float(rotation.find("M_22").text)]])
center = photo_pose.find("Center")
if center is None:
continue
Xs = float(center.find("x").text)
Ys = float(center.find("y").text)
Zs = float(center.find("z").text)
position= np.array([Xs,Ys,Zs])
trans=create_transformation_matrix(rotation_matrix,position)
img = {'iid': img_id,'image_path':image_path, 'imgName': img_name,'R':rotation_matrix,
'Xs': Xs, 'Ys': Ys, 'Zs': Zs,'trans_M':trans}
images[img_id] = img
return images, cameras
def create_transformation_matrix(rotation_matrix, translation_vector):
# 创建一个 4x4 的单位矩阵
transformation_matrix = np.eye(4)
transformation_matrix[:3, :3] = rotation_matrix.T#将rotation求逆
transformation_matrix[:3, 3] = translation_vector
return transformation_matrix
images, cameras = loadAllfrom_xml('pos_undistort.xml')
K=compute_K_matrix(cameras[1]['f'],cameras[1]['cx'],cameras[1]['cy'])
dist_coeffs = np.array([cameras[1]['k1'],cameras[1]['k2'], cameras[1]['p1'], cameras[1]['p2'], cameras[1]['k3']])
view_ply_in_world_coordinate = True
K=K*0.5
K[2][2]=1
build_point_cloud(K,dist_coeffs*0.5,1,view_ply_in_world_coordinate,images,'G:\\graduate2024\\experiment\\test')
需要注意的是,如果深度图的width height和rgb影像存在一个比例关系scale,则K也需要进行相应的尺度变换,例如,我使用的深度图长宽是rgb影像的一半,那么我的K乘以了一个0.5
另外由于我的影像已经事先去除了畸变,其畸变系数为0,因此此代码没有提供去除影像畸变的部分,需自行添加