标题很时髦/最近老师让制作出在mesh中漫游的动画,但使用open3d感觉没有光的质感,想用meshlab。但是meshlab是一款GUI软件,很难与脚本交互,于是想到使用python的鼠标键盘剪切板交互方式,实现在mesh中citywalk。
下文用到的所有代码和数据我都上传到了:
链接:https://pan.baidu.com/s/1ZXbo_BaRFOBLfN-Lkz36Pg
提取码:2023
欢迎下载!😊
Step 1: 生成mesh的轨迹
这里大致的思路是自己在mesh/点云上选几段,依次设置在段内的两个端点进行插值。我时设置了7段,每段生成不同的帧数。
接下来对每段做插值即可,之所以要分段就是因为直走的路和转弯不能混在一起插值,这里要注意的是四元数插值的原理,不能直接拿四元数线性插值,我一开始就犯了这个错误。
插值的时候,每次选择绿线位置作为起止点,记得修改代码中的INTERP_NUM
和START_SAVING_IDX
两个参数。
最终一共生成了280帧。
Step 2: 提前预设好存储的路径
Meshlab的GUI中每存储一张需要按照以下的方式执行:
-
将相机位姿放置到剪贴板,然后在空白处
ctrl + v
。 -
点击左上角的snap保存图片。(一开始一定要设置好存储的路径并且将该路径也设置到代码中的
RT_TXT, START_
参数)
Step 3: 完整代码
from io import StringIO
import numpy as np
import copy
from scipy.spatial.transform import Rotation
from scipy.spatial.transform import Rotation as R
import pyperclip # 替换剪贴板内容
import pyautogui # 控制键鼠
import os
def get_RT(file_path):
lines_raw = open(file_path).readlines()
all_str = ''.join(lines_raw)
R_start = all_str.find('RotationMatrix') + len('RotationMatrix="')
R_end = all_str[R_start:].find('"') + R_start
# '-0.994514 0.1018 0.0240461 0 -0.0044101 0.188872 -0.981992 0 -0.104508 -0.976711 -0.187387 0 0 0 0 1 '
T_start = all_str.find('TranslationVector') + len('TranslationVector="')
T_end = all_str[T_start:].find('"') + T_start
# '-237.916 3.678 28.7412 1'
R = np.loadtxt(StringIO(all_str[R_start:R_end])).reshape(4, 4)
T = np.loadtxt(StringIO(all_str[T_start:T_end]))
return R, T
# 手动实现Slerp插值
def slerp(q1, q2, t):
dot_product = np.dot(q1, q2)
if dot_product < 0:
q1 = -q1
dot_product = -dot_product
dot_product = np.clip(dot_product, -1, 1)
angle = np.arccos(dot_product)
sin_angle = np.sin(angle)
if sin_angle == 0:
return q1
t1 = np.sin((1 - t) * angle) / sin_angle
t2 = np.sin(t * angle) / sin_angle
return t1 * q1 + t2 * q2
# R, T矩阵插值
# R矩阵变成四元数插值然后再变回旋转矩阵
# T矩阵直接插值算了
def RT_interpolate(x_start, x_end, interp_num = 60):
'''
x.shape = (3) or x.shape = (4)
'''
if x_start.shape == (4, ):
single = (x_end - x_start) / interp_num
x_list = []
for idx in range(interp_num):
x_temp = x_start + single*idx
x_temp[-1] = 1
x_list.append(x_temp)
elif x_start.shape == (4, 4):
# 转换为四元数
R_start = Rotation.from_matrix(x_start[:3, :3])
R_end = Rotation.from_matrix(x_end[:3, :3])
quat_start = R_start.as_quat()
quat_end = R_end.as_quat()
# single = (quat_end - quat_start) / interp_num
x_list = []
for idx in range(interp_num):
# 插值
alpha = idx / interp_num
interpolated_quaternion = slerp(quat_start, quat_end, alpha)
interpolated_rotation = R.from_quat(interpolated_quaternion).as_matrix()
# 转换回旋转矩阵(4,4)版本
x_temp = np.eye(4)
x_temp[:3, :3] = interpolated_rotation
x_list.append(x_temp)
else:
x_list = None
return x_list
# R, T矩阵字符串替换
def array2string(np_array):
record = ''
np_array = np_array.reshape(-1)
for i in range(np_array.shape[0]):
record += str(np_array[i]) + ' '
return record[:-1]
def RT_list_to_txt(R_list, T_list, file_path, start_saving_idx, save_txt_path):
lines_raw = open(file_path).readlines()
all_str = ''.join(lines_raw)
R_start = all_str.find('RotationMatrix') + len('RotationMatrix="')
R_end = all_str[R_start:].find('"') + R_start
T_start = all_str.find('TranslationVector') + len('TranslationVector="')
T_end = all_str[T_start:].find('"') + T_start
for idx in range(len(R_list)):
all_str_cp = copy.deepcopy(all_str)
R_string = array2string(R_list[idx])
T_string = array2string(T_list[idx])
all_str_cp = all_str_cp.replace(all_str[R_start:R_end], R_string)
all_str_cp = all_str_cp.replace(all_str[T_start:T_end], T_string)
with open(save_txt_path + str(start_saving_idx + idx) + '.txt', "w") as f:
f.writelines(all_str_cp)
# 是这样的,与GUI界面交互
def string_to_image(camera_pose_str):
# Step 1: 将相机视角放到剪贴板
pyperclip.copy(camera_pose_str)
# Step 2: Snap
pyautogui.click(300, 200)
pyautogui.keyDown('ctrl') # 模拟按下Ctrl键
pyautogui.press('v') # 模拟按下V键
pyautogui.keyUp('ctrl') # 释放Ctrl键
# 鼠标移动到(100, 100)的坐标位置,并在0.1秒内完成
# pyautogui.moveTo(210, 50, duration=0.1)
pyautogui.click(210, 50, duration=0.05)
pyautogui.press('enter')
def save_txt():
# 直道100,转弯15,短直道25
# ===================================================
INTERP_NUM = 10
START_SAVING_IDX = 270
SAVE_TXT_PATH = "D:/DESKTOP/test/"
START_TXT = "D:/DESKTOP/0.txt"
END_TXT = "D:/DESKTOP/-1.txt"
# ===================================================
# 存储插值以后的txt文件
R_start, T_start = get_RT(START_TXT)
R_end, T_end = get_RT(END_TXT)
R_list = RT_interpolate(R_start, R_end, INTERP_NUM)
T_list = RT_interpolate(T_start, T_end, INTERP_NUM)
# 保存好txt文件
RT_list_to_txt(R_list, T_list, START_TXT, START_SAVING_IDX, SAVE_TXT_PATH)
def save_img():
# ==================================================
SAVE_TXT_PATH = "D:/DESKTOP/test/"
SAVE_IMG_PATH = "D:/DESKTOP/0829/MESHES/SNAPS/2011_0930_0018/"
# ==================================================
# 加载txt文件并且读取相应的字符串传给string_to_image
files = os.listdir(SAVE_TXT_PATH)
for idx in range(len(files)):
len_of_img = len(os.listdir(SAVE_IMG_PATH))
file_path = SAVE_TXT_PATH + '/{}.txt'.format(idx)
lines_raw = open(file_path).readlines()
camera_pose_str = ''.join(lines_raw)
string_to_image(camera_pose_str)
if len(os.listdir(SAVE_IMG_PATH)) > len_of_img:
pass
else:
break
def main():
# save_txt()
save_img()
main()
Step 4: 效果演示
最后就可以看到鼠标自动点击和GUI交互,自动复制粘贴,最终效果如下:
Additional
由于俺在服务器上没法根据open3d去可视化并保存图像,这里用的vtk去可视化mesh和pose, 代码如下:
import vtk
import os
import numpy as np
from vtkmodules.util.numpy_support import numpy_to_vtk
POSE_PLY_DIR = '/data/wuke/workspace/2023/tsdf-fusion-python-master/INPUT/KITTI/c2w_ply/'
def load_pose_actor(idx):
# ============================================================
# # 创建 vtkPolyData读取点云数据
# point_cloud_reader = vtk.vtkPLYReader()
# point_cloud_reader.SetFileName(f"{POSE_PLY_DIR}/000to{str(idx).zfill(3)}.ply")
# point_cloud_reader.Update()
# # 映射点云数据
# point_cloud_mapper = vtk.vtkPolyDataMapper()
# point_cloud_mapper.SetInputConnection(point_cloud_reader.GetOutputPort())
# # 创建 vtkActor 映射点云数据
# point_cloud_actor = vtk.vtkActor()
# point_cloud_actor.SetMapper(point_cloud_mapper)
# # point_cloud_actor.GetProperty().SetColor(1.0, 0.0, 0.0) # 设置点云颜色为蓝色
# # point_cloud_actor.GetProperty().SetPointSize(3) # 设置点的大小
# ============================================================
# 创建点云数据
points_npy = np.load(f"{POSE_PLY_DIR}/000to{str(idx).zfill(3)}.npy") # 生成100个随机点作为示例
points = vtk.vtkPoints()
points.SetData(numpy_to_vtk(points_npy))
polydata = vtk.vtkPolyData()
polydata.SetPoints(points)
vertex = vtk.vtkVertexGlyphFilter()
vertex.SetInputData(polydata)
mapper = vtk.vtkPolyDataMapper()
mapper.SetInputConnection(vertex.GetOutputPort())
# 创建点云的演员
actor = vtk.vtkActor()
actor.SetMapper(mapper)
actor.GetProperty().SetColor(1.0, 0.0, 0.0) # 设置点云颜色为蓝色
actor.GetProperty().SetPointSize(2) # 设置点的大小
actor.GetProperty().LightingOff() # 禁用光照
return actor
def view_single_mesh(idx):
# 创建一个 vtkPLYReader 来读取 .ply 文件
reader = vtk.vtkPLYReader()
reader.SetFileName(f"/data/wuke/workspace/2023/tsdf-fusion-python-master/OUTPUT/IDX=000to{str(idx).zfill(3)}_VoxelSize=0.12_Trunc=12.ply")
reader.Update()
# 创建一个 vtkPolyDataMapper 将数据映射到图形
mapper = vtk.vtkPolyDataMapper()
mapper.SetInputConnection(reader.GetOutputPort())
# 创建一个 vtkActor 将映射的图形数据添加到场景中
actor = vtk.vtkActor()
actor.SetMapper(mapper)
actor.GetProperty().LightingOff() # 禁用光照
pose_actor = load_pose_actor(idx)
# 创建一个 vtkRenderer 渲染器
renderer = vtk.vtkRenderer()
renderer.AddActor(pose_actor)
renderer.AddActor(actor)
renderer.SetBackground(1.0, 1.0, 1.0) # 设置背景颜色为白色
# 设置相机位姿
camera = renderer.GetActiveCamera()
camera.SetPosition(201.85505957872468, 22.196916577555967, -283.82837538148243) # 设置相机位置
camera.SetFocalPoint(68.239501953125, 209.88958740234375, 0.5610954761505127) # 设置相机焦点
camera.SetViewUp(-0.430894787820848, 0.6467658403624486, -0.629304083547121) # 设置相机视图向上方向
# (, 上下,左右)
# 创建一个 vtkRenderWindow 渲染窗口
render_window = vtk.vtkRenderWindow()
render_window.AddRenderer(renderer)
render_window.SetSize(1400, 900)
# 创建一个 vtkRenderWindowInteractor 窗口交互器
# render_window_interactor = vtk.vtkRenderWindowInteractor()
# render_window_interactor.SetRenderWindow(render_window)
# 启动渲染并开始交互
render_window.Render()
# 保存渲染结果到图像文件
window_to_image_filter = vtk.vtkWindowToImageFilter()
window_to_image_filter.SetInput(render_window)
window_to_image_filter.Update()
writer = vtk.vtkPNGWriter()
writer.SetFileName(f"/data/wuke/workspace/2023/tsdf-fusion-python-master/NOTEBOOKS/output/000to{str(idx).zfill(3)}.png")
writer.SetInputConnection(window_to_image_filter.GetOutputPort())
writer.Write()
if __name__ == '__main__':
for idx in range(380, 800, 20):
view_single_mesh(idx)