Gaussian Splatting SLAM 学习笔记(2)——MonoGS主要代码

读代码就像在画树状图,先弄清主要脉络,再处理细枝末节。(学长亲授)

那么,代码,启动!【大二小白,仅用作学习记录,敬请批评指正】

仅凭我的学识,看懂可能需要很多精力和时间。但是,我站在“巨人的肩膀”上。所以首先,看看“巨人”的中文注释版代码。

GitHub - KwanWaiPang/Gaussian_Splatting_SLAM_comment: MonoGS的中文注释MonoGS的中文注释. Contribute to KwanWaiPang/Gaussian_Splatting_SLAM_comment development by creating an account on GitHub.icon-default.png?t=N7T8https://github.com/KwanWaiPang/Gaussian_Splatting_SLAM_comment

1. slam.py

1.1 函数分析

1.1.1 mian函数

【感觉自己在进行一个语法的学】

if __name__ == "__main__":
    # Set up command line argument parser-设置命令行参数解析器
    parser = ArgumentParser(description="Training script parameters") #创建一个新的参数解析对象,用于处理命令行参数
    parser.add_argument("--config", type=str) #添加一个名为 --config 的参数,该参数期望一个字符串值。通常用于指定配置文件。
    parser.add_argument("--eval", action="store_true") #添加一个名为 --eval 的参数。action="store_true" 表示这是一个标志,如果在命令行中提供了这个参数,它将存储 True,否则存储 False。这通常用于切换评估模式。

    args = parser.parse_args(sys.argv[1:]) #解析参数

    mp.set_start_method("spawn") #使用多进程启动方法

    with open(args.config, "r") as yml: #打开并加载配置文件
        config = yaml.safe_load(yml)

    config = load_config(args.config) #加载配置
    save_dir = None

    #如果在评估模式下运行,覆盖一些配置参数
    if args.eval:
        Log("Running MonoGS in Evaluation Mode")
        Log("Following config will be overriden")
        Log("\tsave_results=True")
        config["Results"]["save_results"] = True
        Log("\tuse_gui=False")
        config["Results"]["use_gui"] = False
        Log("\teval_rendering=True")
        config["Results"]["eval_rendering"] = True
        Log("\tuse_wandb=True")
        config["Results"]["use_wandb"] = True

     # 如果配置中保存结果的选项为真,创建保存目录
    if config["Results"]["save_results"]:
        mkdir_p(config["Results"]["save_dir"])
        current_datetime = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        path = config["Dataset"]["dataset_path"].split("/")
        save_dir = os.path.join(
            config["Results"]["save_dir"], path[-3] + "_" + path[-2], current_datetime
        )
        tmp = args.config
        tmp = tmp.split(".")[0]
        config["Results"]["save_dir"] = save_dir
        mkdir_p(save_dir)
        with open(os.path.join(save_dir, "config.yml"), "w") as file:
            documents = yaml.dump(config, file)
        Log("saving results in " + save_dir)
        run = wandb.init(
            project="MonoGS",
            name=f"{tmp}_{current_datetime}",
            config=config,
            mode=None if config["Results"]["use_wandb"] else "disabled",
        )
        wandb.define_metric("frame_idx")
        wandb.define_metric("ate*", step_metric="frame_idx")
    
    #slam函数的使用
    slam = SLAM(config, save_dir=save_dir)

    slam.run()
    wandb.finish()

    # All done
    Log("Done.")

1.1.2 slam类

class SLAM:
    def __init__(self, config, save_dir=None):
        # 创建两个 CUDA 事件对象,用于测量时间
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)

        # 记录开始时间
        start.record()

        # 保存配置和保存目录
        self.config = config
        self.save_dir = save_dir

        # 使用 munchify 将配置中的字典转换为对象
        model_params = munchify(config["model_params"])
        opt_params = munchify(config["opt_params"])
        pipeline_params = munchify(config["pipeline_params"])

        # 保存模型、优化和管道参数
        self.model_params, self.opt_params, self.pipeline_params = (
            model_params,
            opt_params,
            pipeline_params,
        )

        # 根据配置设置实时模式和单目模式
        self.live_mode = self.config["Dataset"]["type"] == "realsense"
        self.monocular = self.config["Dataset"]["sensor_type"] == "monocular"
        self.use_spherical_harmonics = self.config["Training"]["spherical_harmonics"]
        self.use_gui = self.config["Results"]["use_gui"]

        # 如果是实时模式,强制使用 GUI
        if self.live_mode:
            self.use_gui = True
        self.eval_rendering = self.config["Results"]["eval_rendering"]

        # 根据是否使用球面调和函数设置 sh_degree
        model_params.sh_degree = 3 if self.use_spherical_harmonics else 0

        # 初始化高斯模型
        self.gaussians = GaussianModel(model_params.sh_degree, config=self.config)
        self.gaussians.init_lr(6.0)

        # 加载数据集
        self.dataset = load_dataset(
            model_params, model_params.source_path, config=config
        )

        # 设置高斯模型的训练
        self.gaussians.training_setup(opt_params)

        # 初始化背景颜色
        bg_color = [0, 0, 0]
        self.background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

        # 创建前端和后端队列
        frontend_queue = mp.Queue()
        backend_queue = mp.Queue()

        # 创建 GUI 队列,如果不使用 GUI,则使用虚拟队列
        q_main2vis = mp.Queue() if self.use_gui else FakeQueue()
        q_vis2main = mp.Queue() if self.use_gui else FakeQueue()

        # 更新配置中的保存目录和单目设置
        self.config["Results"]["save_dir"] = save_dir
        self.config["Training"]["monocular"] = self.monocular

        # 初始化前端和后端
        self.frontend = FrontEnd(self.config)
        self.backend = BackEnd(self.config)

        # 设置前端的属性
        self.frontend.dataset = self.dataset
        self.frontend.background = self.background
        self.frontend.pipeline_params = self.pipeline_params
        self.frontend.frontend_queue = frontend_queue
        self.frontend.backend_queue = backend_queue
        self.frontend.q_main2vis = q_main2vis
        self.frontend.q_vis2main = q_vis2main
        self.frontend.set_hyperparams()

        # 设置后端的属性
        self.backend.gaussians = self.gaussians
        self.backend.background = self.background
        self.backend.cameras_extent = 6.0
        self.backend.pipeline_params = self.pipeline_params
        self.backend.opt_params = self.opt_params
        self.backend.frontend_queue = frontend_queue
        self.backend.backend_queue = backend_queue
        self.backend.live_mode = self.live_mode

        self.backend.set_hyperparams()

        # 初始化 GUI 参数
        self.params_gui = gui_utils.ParamsGUI(
            pipe=self.pipeline_params,
            background=self.background,
            gaussians=self.gaussians,
            q_main2vis=q_main2vis,
            q_vis2main=q_vis2main,
        )

        # 启动后端进程
        backend_process = mp.Process(target=self.backend.run)
        if self.use_gui:
            # 如果使用 GUI,启动 GUI 进程
            gui_process = mp.Process(target=slam_gui.run, args=(self.params_gui,))
            gui_process.start()
            time.sleep(5)

        backend_process.start()
        self.frontend.run()
        backend_queue.put(["pause"])

        # 记录结束时间
        end.record()
        torch.cuda.synchronize()

        # 计算总帧数和每秒帧数
        N_frames = len(self.frontend.cameras)
        FPS = N_frames / (start.elapsed_time(end) * 0.001)
        Log("Total time", start.elapsed_time(end) * 0.001, tag="Eval")
        Log("Total FPS", N_frames / (start.elapsed_time(end) * 0.001), tag="Eval")

        # 如果需要评估渲染
        if self.eval_rendering:
            self.gaussians = self.frontend.gaussians
            kf_indices = self.frontend.kf_indices
            ATE = eval_ate(
                self.frontend.cameras,
                self.frontend.kf_indices,
                self.save_dir,
                0,
                final=True,
                monocular=self.monocular,
            )

            # 评估渲染结果
            rendering_result = eval_rendering(
                self.frontend.cameras,
                self.gaussians,
                self.dataset,
                self.save_dir,
                self.pipeline_params,
                self.background,
                kf_indices=kf_indices,
                iteration="before_opt",
            )

            # 创建并记录度量表
            columns = ["tag", "psnr", "ssim", "lpips", "RMSE ATE", "FPS"]
            metrics_table = wandb.Table(columns=columns)
            metrics_table.add_data(
                "Before",
                rendering_result["mean_psnr"],
                rendering_result["mean_ssim"],
                rendering_result["mean_lpips"],
                ATE,
                FPS,
            )

            # 清空前端队列并请求后端进行颜色优化
            while not frontend_queue.empty():
                frontend_queue.get()
            backend_queue.put(["color_refinement"])
            while True:
                if frontend_queue.empty():
                    time.sleep(0.01)
                    continue
                data = frontend_queue.get()
                if data[0] == "sync_backend" and frontend_queue.empty():
                    gaussians = data[1]
                    self.gaussians = gaussians
                    break

            # 评估优化后的渲染结果
            rendering_result = eval_rendering(
                self.frontend.cameras,
                self.gaussians,
                self.dataset,
                self.save_dir,
                self.pipeline_params,
                self.background,
                kf_indices=kf_indices,
                iteration="after_opt",
            )
            metrics_table.add_data(
                "After",
                rendering_result["mean_psnr"],
                rendering_result["mean_ssim"],
                rendering_result["mean_lpips"],
                ATE,
                FPS,
            )
            wandb.log({"Metrics": metrics_table})
            save_gaussians(self.gaussians, self.save_dir, "final_after_opt", final=True)

        # 停止后端进程并等待其结束
        backend_queue.put(["stop"])
        backend_process.join()
        Log("Backend stopped and joined the main thread")
        if self.use_gui:
            q_main2vis.put(gui_utils.GaussianPacket(finish=True))
            gui_process.join()
            Log("GUI Stopped and joined the main thread")

    def run(self):
        pass

1.2 一些问题

1.深度图?

http://t.csdnimg.cn/QURGticon-default.png?t=N7T8http://t.csdnimg.cn/QURGt

 2.多进程启动方法?

多进程启动方法指的是在创建新进程时,用于初始化和启动进程的方式。上述代码中运用了spawn模式。当然,python还支持forkserver、fork模式。

spawn

  • 在 Unix 和 Windows 上都可用。
  • 创建一个全新的 Python 解释器进程(一个能够读取并执行 Python 代码的程序)。
  • 父进程只会将必要的信息传递给子进程。
  • 启动速度较慢,但更安全,因为不会继承父进程的状态(如打开的文件描述符、线程等)。
  • 适用于跨平台代码和需要更多控制进程初始化的场景。
3.评估模式?

“评估模式”(evaluation mode)指的是模型在训练完成后进入的一种特殊状态,用于评估模型的性能。评估模式的主要目的是在不改变模型参数的情况下,使用测试数据集或验证数据集来衡量模型的预测准确性和其他性能指标。

4.munchify函数的作用?

munchify 函数通常来自 munch 模块,用于将字典转换为 Munch 对象。Munch 是一种特殊的字典,它允许通过属性(attribute)访问字典中的值,而不仅仅是通过键(key)访问。这使得代码更加简洁和直观

5.实时模式和单目模式?

(1)

实时模式通常指的是系统在运行时直接从传感器(如相机)获取数据并立即进行处理。这种模式通常用于需要即时反馈和处理的应用场景,如机器人导航、无人机飞行、增强现实(AR)等。

  • 特点

    • 数据流动性:实时从传感器获取数据,并立即处理。
    • 低延迟:处理结果几乎即时可用,适用于需要快速响应的应用。
    • 持续运行:系统在持续运行过程中不断获取和处理数据。

(2)

单目模式指的是使用单个相机(而不是立体相机或多摄像头系统)进行图像捕获和处理。在视觉SLAM中,单目模式利用单个相机的图像序列来进行定位和地图构建。

  • 特点

    • 单个摄像头:只使用一个摄像头进行图像捕获。
    • 深度信息获取困难:由于缺乏立体视觉的深度信息,单目模式通常需要通过运动视差和其他视觉线索来估计深度。
    • 成本较低:相比立体相机系统,单目相机系统的硬件成本较低。
6.GUI队列?虚拟队列?

(1)GUI队列 是一种用于管理和协调图形用户界面任务的工具,能够处理用户输入、任务调度以及线程间的通信。它帮助实现高效的界面更新和后台任务处理,确保应用程序在不同的任务之间保持稳定和高效的操作。

(2)虚拟队列是一种队列的数据结构,它与传统的队列有相似的功能,但并不直接映射到物理队列的实现。虚拟队列可以是软件层面的抽象,设计来处理数据或任务的管理、调度和通信。它用于处理复杂的任务和事件系统中的数据流、消息传递或任务调度。

7.前端和后端的区别?

(1)前端是指用户直接交互的部分,通常包括用户界面(UI)和用户体验(UX)设计。前端开发涉及创建网页或应用程序的视觉和交互元素。

(2)后端是指应用程序的服务器端部分,负责处理数据存储、业务逻辑、用户认证等功能。后端开发涉及创建和管理服务器、数据库和应用程序的核心逻辑。

2. slam_backened.py

2.1 函数分析

本函数分析有详有略,有些地方仅作了大概的内容解释。(2.1.6和2.1.8较为详细)

2.1.1 BackEnd类定义

class BackEnd(mp.Process):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.gaussians = None
        self.pipeline_params = None
        self.opt_params = None
        self.background = None
        self.cameras_extent = None
        self.frontend_queue = None
        self.backend_queue = None
        self.live_mode = False

        self.pause = False
        self.device = "cuda"
        self.dtype = torch.float32
        self.monocular = config["Training"]["monocular"]
        self.iteration_count = 0
        self.last_sent = 0
        self.occ_aware_visibility = {}
        self.viewpoints = {}
        self.current_window = []
        self.initialized = not self.monocular
        self.keyframe_optimizers = None

2.1.2 设置超参数

从配置中读取超参数,设置高斯模型的训练过程中的各类参数,包括高斯模型更新、映射迭代等。

def set_hyperparams(self):
    self.save_results = self.config["Results"]["save_results"]

    self.init_itr_num = self.config["Training"]["init_itr_num"] #建图迭代次数
    self.init_gaussian_update = self.config["Training"]["init_gaussian_update"]
    self.init_gaussian_reset = self.config["Training"]["init_gaussian_reset"]
    self.init_gaussian_th = self.config["Training"]["init_gaussian_th"]
    self.init_gaussian_extent = (
        self.cameras_extent * self.config["Training"]["init_gaussian_extent"]
    )
    self.mapping_itr_num = self.config["Training"]["mapping_itr_num"]
    self.gaussian_update_every = self.config["Training"]["gaussian_update_every"]
    self.gaussian_update_offset = self.config["Training"]["gaussian_update_offset"]
    self.gaussian_th = self.config["Training"]["gaussian_th"]
    self.gaussian_extent = (
        self.cameras_extent * self.config["Training"]["gaussian_extent"]
    )
    self.gaussian_reset = self.config["Training"]["gaussian_reset"]
    self.size_threshold = self.config["Training"]["size_threshold"]
    self.window_size = self.config["Training"]["window_size"]
    self.single_thread = (
        self.config["Dataset"]["single_thread"]
        if "single_thread" in self.config["Dataset"]
        else False
    )

2.1.3 添加关键帧

将新关键帧添加到高斯模型中,并根据深度图进行初始化。

def add_next_kf(self, frame_idx, viewpoint, init=False, scale=2.0, depth_map=None):
    self.gaussians.extend_from_pcd_seq( #从点云序列中扩展高斯模型
        viewpoint, kf_id=frame_idx, init=init, scale=scale, depthmap=depth_map
    )

2.1.4 重置状态

清空视角、关键帧优化器,并去掉所有高斯点,清空队列。

def reset(self):
    self.iteration_count = 0
    self.occ_aware_visibility = {}
    self.viewpoints = {}
    self.current_window = []
    self.initialized = not self.monocular
    self.keyframe_optimizers = None

    # remove all gaussians
    self.gaussians.prune_points(self.gaussians.unique_kfIDs >= 0)
    # remove everything from the queues
    while not self.backend_queue.empty():
        self.backend_queue.get()

 2.1.5 初始化地图

对初始关键帧进行渲染,计算损失,更新高斯模型的统计信息,并进行高斯点的稠密化和修剪。

def initialize_map(self, cur_frame_idx, viewpoint):
    for mapping_iteration in range(self.init_itr_num):
        self.iteration_count += 1
        render_pkg = render(
            viewpoint, self.gaussians, self.pipeline_params, self.background
        )
        (
            image,
            viewspace_point_tensor,
            visibility_filter,
            radii,
            depth,
            opacity,
            n_touched,
        ) = (
            render_pkg["render"],
            render_pkg["viewspace_points"],
            render_pkg["visibility_filter"],
            render_pkg["radii"],
            render_pkg["depth"],
            render_pkg["opacity"],
            render_pkg["n_touched"],
        )
        loss_init = get_loss_mapping(
            self.config, image, depth, viewpoint, opacity, initialization=True
        )
        loss_init.backward()

        with torch.no_grad():
            self.gaussians.max_radii2D[visibility_filter] = torch.max(
                self.gaussians.max_radii2D[visibility_filter],
                radii[visibility_filter],
            )
            self.gaussians.add_densification_stats(
                viewspace_point_tensor, visibility_filter
            )
            if mapping_iteration % self.init_gaussian_update == 0:
                self.gaussians.densify_and_prune(
                    self.opt_params.densify_grad_threshold,
                    self.init_gaussian_th,
                    self.init_gaussian_extent,
                    None,
                )

            if self.iteration_count == self.init_gaussian_reset or (
                self.iteration_count == self.opt_params.densify_from_iter
            ):
                self.gaussians.reset_opacity()

            self.gaussians.optimizer.step()
            self.gaussians.optimizer.zero_grad(set_to_none=True)

    self.occ_aware_visibility[cur_frame_idx] = (n_touched > 0).long()
    Log("Initialized map")
    return render_pkg

 2.1.6 地图优化

def map(self, current_window, prune=False, iters=1):
    # 首先,检查当前窗口是否为空,如果为空则直接返回,不进行后续操作
    if len(current_window) == 0:
        return

    #从viewpoints中提取视角
    viewpoint_stack = [self.viewpoints[kf_idx] for kf_idx in current_window]
    random_viewpoint_stack = [] #初始化,用于存储不在当前窗口的视角
    frames_to_optimize = self.config["Training"]["pose_window"] #获取需要优化的帧数

    current_window_set = set(current_window)
    # 遍历所有视角,如果视角不在当前窗口中,则将其添加到 random_viewpoint_stack中
    for cam_idx, viewpoint in self.viewpoints.items():
        if cam_idx in current_window_set:
            continue
        random_viewpoint_stack.append(viewpoint)

    #进行多次迭代优化每个视角的效果并计算损失值
    for _ in range(iters):
        self.iteration_count += 1 #跟踪迭代次数
        self.last_sent += 1 #跟踪发送次数

        loss_mapping = 0
        viewspace_point_tensor_acm = []
        visibility_filter_acm = []
        radii_acm = []
        n_touched_acm = []

        keyframes_opt = []

        # 遍历当前窗口中的所有关键帧,对每一帧进行建图
        for cam_idx in range(len(current_window)):
            viewpoint = viewpoint_stack[cam_idx] #获取该相机的视角信息 viewpoint
            keyframes_opt.append(viewpoint) #将 viewpoint 添加到 keyframes_opt 列表中

           # 调用 render 函数来渲染场景,其中传入了当前相机的信息 viewpoint、高斯模型数据 self.gaussians、管道参数 self.pipeline_params 和背景信息 self.background。渲染完成后,将渲染结果存储在 render_pkg 变量中
            render_pkg = render(
             # 将提取出的渲染结果分别赋值给对应的变量,包括图像、视角点、可见性、半径、深度、不透明度和触摸数等信息
                viewpoint, self.gaussians, self.pipeline_params, self.background
            )
            (
                image,
                viewspace_point_tensor,
                visibility_filter,
                radii,
                depth,
                opacity,
                n_touched,
            ) = (
                render_pkg["render"],
                render_pkg["viewspace_points"],
                render_pkg["visibility_filter"],
                render_pkg["radii"],
                render_pkg["depth"],
                render_pkg["opacity"],
                render_pkg["n_touched"],
            )

            loss_mapping += get_loss_mapping(
                    self.config, image, depth, viewpoint, opacity
                )
                viewspace_point_tensor_acm.append(viewspace_point_tensor)
                visibility_filter_acm.append(visibility_filter)
                radii_acm.append(radii)
                n_touched_acm.append(n_touched)

            #随机选择两个视角并渲染
            for cam_idx in torch.randperm(len(random_viewpoint_stack))[:2]:
                viewpoint = random_viewpoint_stack[cam_idx]
                render_pkg = render(
                    viewpoint, self.gaussians, self.pipeline_params, self.background
                )
                (
                    image,
                    viewspace_point_tensor,
                    visibility_filter,
                    radii,
                    depth,
                    opacity,
                    n_touched,
                ) = (
                    render_pkg["render"],
                    render_pkg["viewspace_points"],
                    render_pkg["visibility_filter"],
                    render_pkg["radii"],
                    render_pkg["depth"],
                    render_pkg["opacity"],
                    render_pkg["n_touched"],
                )
                loss_mapping += get_loss_mapping(
                    self.config, image, depth, viewpoint, opacity
                )
                viewspace_point_tensor_acm.append(viewspace_point_tensor)
                visibility_filter_acm.append(visibility_filter)
                radii_acm.append(radii)

            scaling = self.gaussians.get_scaling
            isotropic_loss = torch.abs(scaling - scaling.mean(dim=1).view(-1, 1))
            loss_mapping += 10 * isotropic_loss.mean()
            loss_mapping.backward()

            #对高斯点云进行剪枝
            gaussian_split = False
            ## Deinsifying / Pruning Gaussians
            with torch.no_grad():
                self.occ_aware_visibility = {}
                for idx in range((len(current_window))):
                    kf_idx = current_window[idx]
                    n_touched = n_touched_acm[idx]
                    self.occ_aware_visibility[kf_idx] = (n_touched > 0).long()

                # # compute the visibility of the gaussians
                # # Only prune on the last iteration and when we have full window
                if prune:
                    if len(current_window) == self.config["Training"]["window_size"]:
                        prune_mode = self.config["Training"]["prune_mode"]
                        prune_coviz = 3
                        self.gaussians.n_obs.fill_(0)
                        for window_idx, visibility in self.occ_aware_visibility.items():
                            self.gaussians.n_obs += visibility.cpu()
                        to_prune = None
                        if prune_mode == "odometry":
                            to_prune = self.gaussians.n_obs < 3
                            # make sure we don't split the gaussians, break here.
                        if prune_mode == "slam":
                            # only prune keyframes which are relatively new
                            sorted_window = sorted(current_window, reverse=True)
                            mask = self.gaussians.unique_kfIDs >= sorted_window[2]
                            if not self.initialized:
                                mask = self.gaussians.unique_kfIDs >= 0
                            to_prune = torch.logical_and(
                                self.gaussians.n_obs <= prune_coviz, mask
                            )
                        if to_prune is not None and self.monocular:
                            self.gaussians.prune_points(to_prune.cuda())
                            for idx in range((len(current_window))):
                                current_idx = current_window[idx]
                                self.occ_aware_visibility[current_idx] = (
                                    self.occ_aware_visibility[current_idx][~to_prune]
                                )
                        if not self.initialized:
                            self.initialized = True
                            Log("Initialized SLAM")
                        # # make sure we don't split the gaussians, break here.
                    return False

                for idx in range(len(viewspace_point_tensor_acm)):
                    self.gaussians.max_radii2D[visibility_filter_acm[idx]] = torch.max(
                        self.gaussians.max_radii2D[visibility_filter_acm[idx]],
                        radii_acm[idx][visibility_filter_acm[idx]],
                    )
                    self.gaussians.add_densification_stats(
                        viewspace_point_tensor_acm[idx], visibility_filter_acm[idx]
                    )

                update_gaussian = (
                    self.iteration_count % self.gaussian_update_every
                    == self.gaussian_update_offset
                )
                if update_gaussian:
                    self.gaussians.densify_and_prune(
                        self.opt_params.densify_grad_threshold,
                        self.gaussian_th,
                        self.gaussian_extent,
                        self.size_threshold,
                    )
                    gaussian_split = True

                ## Opacity reset
                if (self.iteration_count % self.gaussian_reset) == 0 and (
                    not update_gaussian
                ):
                    Log("Resetting the opacity of non-visible Gaussians")
                    self.gaussians.reset_opacity_nonvisible(visibility_filter_acm)
                    gaussian_split = True

                self.gaussians.optimizer.step()
                self.gaussians.optimizer.zero_grad(set_to_none=True)
                self.gaussians.update_learning_rate(self.iteration_count)
                self.keyframe_optimizers.step()
                self.keyframe_optimizers.zero_grad(set_to_none=True)
                # Pose update
                for cam_idx in range(min(frames_to_optimize, len(current_window))):
                    viewpoint = viewpoint_stack[cam_idx]
                    if viewpoint.uid == 0:
                        continue
                    update_pose(viewpoint)
        return gaussian_split

 2.1.7 颜色优化

def color_refinement(self):
        Log("Starting color refinement")

        iteration_total = 26000
        for iteration in tqdm(range(1, iteration_total + 1)):
            viewpoint_idx_stack = list(self.viewpoints.keys())
            viewpoint_cam_idx = viewpoint_idx_stack.pop(
                random.randint(0, len(viewpoint_idx_stack) - 1)
            )
            viewpoint_cam = self.viewpoints[viewpoint_cam_idx]
            render_pkg = render(
                viewpoint_cam, self.gaussians, self.pipeline_params, self.background
            )
            image, visibility_filter, radii = (
                render_pkg["render"],
                render_pkg["visibility_filter"],
                render_pkg["radii"],
            )

            gt_image = viewpoint_cam.original_image.cuda()
            Ll1 = l1_loss(image, gt_image)
            loss = (1.0 - self.opt_params.lambda_dssim) * (
                Ll1
            ) + self.opt_params.lambda_dssim * (1.0 - ssim(image, gt_image))
            loss.backward()
            with torch.no_grad():
                self.gaussians.max_radii2D[visibility_filter] = torch.max(
                    self.gaussians.max_radii2D[visibility_filter],
                    radii[visibility_filter],
                )
                self.gaussians.optimizer.step()
                self.gaussians.optimizer.zero_grad(set_to_none=True)
                self.gaussians.update_learning_rate(iteration)
        Log("Map refinement done")

2.1.8 开启后端进程

def push_to_frontend(self, tag=None):
        self.last_sent = 0
        keyframes = []
        for kf_idx in self.current_window:
            kf = self.viewpoints[kf_idx]
            keyframes.append((kf_idx, kf.R.clone(), kf.T.clone()))
        if tag is None:
            tag = "sync_backend"

        msg = [tag, clone_obj(self.gaussians), self.occ_aware_visibility, keyframes]
        self.frontend_queue.put(msg)

    def run(self):
        while True:
            if self.backend_queue.empty():
                #如果系统处于暂停状态,则休眠一段时间并继续下一次循环
                if self.pause:
                    time.sleep(0.01)
                    continue
                # 如果当前窗口中没有关键帧,则休眠一段时间并继续下一次循环
                if len(self.current_window) == 0:
                    time.sleep(0.01)
                    continue
                # 如果是单线程模式,则休眠一段时间并继续下一次循环
                if self.single_thread:
                    time.sleep(0.01)
                    continue
                #进行建图
                self.map(self.current_window)
                if self.last_sent >= 10:
                    self.map(self.current_window, prune=True, iters=10)
                    self.push_to_frontend()
            else:
                data = self.backend_queue.get()
                if data[0] == "stop":
                    break
                elif data[0] == "pause":
                    self.pause = True
                elif data[0] == "unpause":
                    self.pause = False
                elif data[0] == "color_refinement":
                    self.color_refinement()
                    self.push_to_frontend()
                elif data[0] == "init":
                    cur_frame_idx = data[1]
                    viewpoint = data[2]
                    depth_map = data[3]
                    Log("Resetting the system")
                    self.reset()

                    self.viewpoints[cur_frame_idx] = viewpoint

                    #添加关键帧,此处进行高斯的初始化
                    self.add_next_kf(
                        cur_frame_idx, viewpoint, depth_map=depth_map, init=True
                    )
                    self.initialize_map(cur_frame_idx, viewpoint)
                    self.push_to_frontend("init")

                elif data[0] == "keyframe":
                    cur_frame_idx = data[1]
                    viewpoint = data[2]
                    current_window = data[3]
                    depth_map = data[4]

                    self.viewpoints[cur_frame_idx] = viewpoint
                    self.current_window = current_window
                    self.add_next_kf(cur_frame_idx, viewpoint, depth_map=depth_map)

                    opt_params = []
                    frames_to_optimize = self.config["Training"]["pose_window"]
                    iter_per_kf = self.mapping_itr_num if self.single_thread else 10
                    if not self.initialized:
                        if (
                            len(self.current_window)
                            == self.config["Training"]["window_size"]
                        ):
                            frames_to_optimize = (
                                self.config["Training"]["window_size"] - 1
                            )
                            iter_per_kf = 50 if self.live_mode else 300
                            Log("Performing initial BA for initialization")
                        else:
                            iter_per_kf = self.mapping_itr_num
                    for cam_idx in range(len(self.current_window)):
                        if self.current_window[cam_idx] == 0:
                            continue
                        viewpoint = self.viewpoints[current_window[cam_idx]]
                        if cam_idx < frames_to_optimize:
                            opt_params.append(
                                {
                                    "params": [viewpoint.cam_rot_delta],
                                    "lr": self.config["Training"]["lr"]["cam_rot_delta"]
                                    * 0.5,
                                    "name": "rot_{}".format(viewpoint.uid),
                                }
                            )
                            opt_params.append(
                                {
                                    "params": [viewpoint.cam_trans_delta],
                                    "lr": self.config["Training"]["lr"][
                                        "cam_trans_delta"
                                    ]
                                    * 0.5,
                                    "name": "trans_{}".format(viewpoint.uid),
                                }
                            )
                        opt_params.append(
                            {
                                "params": [viewpoint.exposure_a],
                                "lr": 0.01,
                                "name": "exposure_a_{}".format(viewpoint.uid),
                            }
                        )
                        opt_params.append(
                            {
                                "params": [viewpoint.exposure_b],
                                "lr": 0.01,
                                "name": "exposure_b_{}".format(viewpoint.uid),
                            }
                        )
                    self.keyframe_optimizers = torch.optim.Adam(opt_params)

                    self.map(self.current_window, iters=iter_per_kf)
                    self.map(self.current_window, prune=True)
                    self.push_to_frontend("keyframe")
                else:
                    raise Exception("Unprocessed data", data)
        while not self.backend_queue.empty():
            self.backend_queue.get()
        while not self.frontend_queue.empty():
            self.frontend_queue.get()
        return

2.2 一些问题

1.超参数?

超参数是指在算法开始训练之前设定的参数,它们决定了训练过程中的学习规则和模型的结构。不同于模型参数(如权重和偏置),超参数需要通过实验来优化,以找到最佳的设置来提升模型性能。

2.管道参数?

机器学习中,管道(Pipeline)是一个用于将数据处理和模型训练步骤串联起来的工具。管道可以帮助简化工作流程,确保数据处理和模型训练的每个步骤按顺序执行,并且便于进行跨步骤的参数调整和模型评估。

一个机器学习管道通常包括以下几个主要组件:

  • 数据预处理:如数据清理、数据转换、特征缩放等。
  • 特征工程:如特征选择、特征提取等。
  • 模型训练:选择和训练机器学习模型。
  • 模型评估:评估模型性能,如通过交叉验证或其他评估指标来检验模型效果。

3. slam_fronted.py

3.1 函数分析

该函数的类定义和超参数设置等和2.类似,不再赘述。

3.1.1 传递相关数据到前端

这个方法将传递的数据中的高斯模型、可见性信息和关键帧信息分别赋值给前端的对应属性。然后遍历关键帧信息列表,对于每个关键帧,更新相应的相机参数。

    def sync_backend(self, data):
        self.gaussians = data[1]
        occ_aware_visibility = data[2]
        keyframes = data[3]
        self.occ_aware_visibility = occ_aware_visibility

        for kf_id, kf_R, kf_T in keyframes:
            self.cameras[kf_id].update_RT(kf_R.clone(), kf_T.clone())

3.1.2 前后端通信

def run(self):
        cur_frame_idx = 0
        projection_matrix = getProjectionMatrix2(#从数据集中提取的投影矩阵,用于将3D投影到2D图像上
            znear=0.01,
            zfar=100.0,
            fx=self.dataset.fx,
            fy=self.dataset.fy,
            cx=self.dataset.cx,
            cy=self.dataset.cy,
            W=self.dataset.width,
            H=self.dataset.height,
        ).transpose(0, 1)
        projection_matrix = projection_matrix.to(device=self.device)
        tic = torch.cuda.Event(enable_timing=True)
        toc = torch.cuda.Event(enable_timing=True)

        while True:
            if self.q_vis2main.empty():
                if self.pause:
                    continue
            else:
                data_vis2main = self.q_vis2main.get()
                self.pause = data_vis2main.flag_pause
                if self.pause:
                    self.backend_queue.put(["pause"])
                    continue
                else:
                    self.backend_queue.put(["unpause"])

            if self.frontend_queue.empty():
                tic.record()
                if cur_frame_idx >= len(self.dataset):
                    if self.save_results:
                        eval_ate(
                            self.cameras,
                            self.kf_indices,
                            self.save_dir,
                            0,
                            final=True,
                            monocular=self.monocular,
                        )
                        save_gaussians(
                            self.gaussians, self.save_dir, "final", final=True
                        )
                    break

                if self.requested_init:
                    time.sleep(0.01)
                    continue

                if self.single_thread and self.requested_keyframe > 0:
                    time.sleep(0.01)
                    continue

                if not self.initialized and self.requested_keyframe > 0:
                    time.sleep(0.01)
                    continue

                viewpoint = Camera.init_from_dataset(
                    self.dataset, cur_frame_idx, projection_matrix
                )
                viewpoint.compute_grad_mask(self.config)

                self.cameras[cur_frame_idx] = viewpoint

                if self.reset:
                    self.initialize(cur_frame_idx, viewpoint)
                    self.current_window.append(cur_frame_idx)
                    cur_frame_idx += 1
                    continue

                #更新相机和关键帧
                self.initialized = self.initialized or (
                    len(self.current_window) == self.window_size
                )

                # Tracking
                render_pkg = self.tracking(cur_frame_idx, viewpoint)

                current_window_dict = {}
                current_window_dict[self.current_window[0]] = self.current_window[1:]
                keyframes = [self.cameras[kf_idx] for kf_idx in self.current_window]

                self.q_main2vis.put(
                    gui_utils.GaussianPacket(
                        gaussians=clone_obj(self.gaussians),
                        current_frame=viewpoint,
                        keyframes=keyframes,
                        kf_window=current_window_dict,
                    )
                )

                if self.requested_keyframe > 0:
                    self.cleanup(cur_frame_idx)
                    cur_frame_idx += 1
                    continue

                last_keyframe_idx = self.current_window[0]
                check_time = (cur_frame_idx - last_keyframe_idx) >= self.kf_interval
                curr_visibility = (render_pkg["n_touched"] > 0).long()
                create_kf = self.is_keyframe(
                    cur_frame_idx,
                    last_keyframe_idx,
                    curr_visibility,
                    self.occ_aware_visibility,
                )
                if len(self.current_window) < self.window_size:
                    union = torch.logical_or(
                        curr_visibility, self.occ_aware_visibility[last_keyframe_idx]
                    ).count_nonzero()
                    intersection = torch.logical_and(
                        curr_visibility, self.occ_aware_visibility[last_keyframe_idx]
                    ).count_nonzero()
                    point_ratio = intersection / union
                    create_kf = (
                        check_time
                        and point_ratio < self.config["Training"]["kf_overlap"]
                    )
                if self.single_thread:
                    create_kf = check_time and create_kf
                if create_kf:
                    self.current_window, removed = self.add_to_window(
                        cur_frame_idx,
                        curr_visibility,
                        self.occ_aware_visibility,
                        self.current_window,
                    )
                    if self.monocular and not self.initialized and removed is not None:
                        self.reset = True
                        Log(
                            "Keyframes lacks sufficient overlap to initialize the map, resetting."
                        )
                        continue
                    depth_map = self.add_new_keyframe(
                        cur_frame_idx,
                        depth=render_pkg["depth"],
                        opacity=render_pkg["opacity"],
                        init=False,
                    )
                    self.request_keyframe(
                        cur_frame_idx, viewpoint, self.current_window, depth_map
                    )
                else:
                    self.cleanup(cur_frame_idx)
                cur_frame_idx += 1

                #如果设置了保存结果,且当前帧是一个关键帧,并且达到了保存轨迹的时间间隔,则进行 ATE 评估
                if (
                    self.save_results
                    and self.save_trj
                    and create_kf
                    and len(self.kf_indices) % self.save_trj_kf_intv == 0
                ):
                    Log("Evaluating ATE at frame: ", cur_frame_idx)
                    eval_ate(
                        self.cameras,
                        self.kf_indices,
                        self.save_dir,
                        cur_frame_idx,
                        monocular=self.monocular,
                    )
                toc.record()
                torch.cuda.synchronize()
                if create_kf:
                    # throttle at 3fps when keyframe is added
                    duration = tic.elapsed_time(toc)
                    time.sleep(max(0.01, 1.0 / 3.0 - duration / 1000))
            else:#处理前端队列中的请求
                data = self.frontend_queue.get()
                if data[0] == "sync_backend":
                    self.sync_backend(data)

                elif data[0] == "keyframe":
                    self.sync_backend(data)
                    self.requested_keyframe -= 1

                elif data[0] == "init":
                    self.sync_backend(data)
                    self.requested_init = False

                elif data[0] == "stop":
                    Log("Frontend Stopped.")
                    break

3.2 一些问题

1.ATE评估?

ATE是计算估计的相机轨迹与地面真实轨迹之间的误差。具体来说,它度量了估计轨迹和真实轨迹在每个时间点上的绝对位置误差。ATE评估是SLAM和视觉里程计系统中一个非常重要的性能指标,它可以帮助评估SLAM或视觉里程计系统的整体精度。

4. gaussian_model.py

4.1 函数分析

4.1.1 构建协方差

从尺度以及旋转构建协方差。

    def build_covariance_from_scaling_rotation(
        self, scaling, scaling_modifier, rotation
    ):
        L = build_scaling_rotation(scaling_modifier * scaling, rotation)
        actual_covariance = L @ L.transpose(1, 2)
        symm = strip_symmetric(actual_covariance)
        return symm

 4.1.2 从相机信息和深度图生成点云

def create_pcd_from_image(self, cam_info, init=False, scale=2.0, depthmap=None):
        cam = cam_info #存储相机信息的对象,包含内参、图像数据、深度数据等
        image_ab = (torch.exp(cam.exposure_a)) * cam.original_image + cam.exposure_b#根据相机曝光参数对原始图像进行曝光校正
        image_ab = torch.clamp(image_ab, 0.0, 1.0)#将校正后的图像进行范围限制,确保其值在 [0, 1] 范围内
        rgb_raw = (image_ab * 255).byte().permute(1, 2, 0).contiguous().cpu().numpy()#将校正后的图像转换为 numpy 数组,便于后续处理

        if depthmap is not None:
            rgb = o3d.geometry.Image(rgb_raw.astype(np.uint8))# 将 RGB 图像数据转换为 Open3D 图像对象
            depth = o3d.geometry.Image(depthmap.astype(np.float32))# 将深度图像数据转换为 Open3D 图像对象
        else:#未提供深度图像
            depth_raw = cam.depth#获取相机深度信息
            if depth_raw is None:#创建一个空的深度图像
                depth_raw = np.empty((cam.image_height, cam.image_width))

            #对于单目传感器类型,添加噪声并进行缩放来生成深度图像
            if self.config["Dataset"]["sensor_type"] == "monocular":
                depth_raw = (
                    np.ones_like(depth_raw)
                    + (np.random.randn(depth_raw.shape[0], depth_raw.shape[1]) - 0.5)
                    * 0.05
                ) * scale

            rgb = o3d.geometry.Image(rgb_raw.astype(np.uint8))
            depth = o3d.geometry.Image(depth_raw.astype(np.float32))

        return self.create_pcd_from_image_and_depth(cam, rgb, depth, init)

 4.1.3 从RGB 图像和深度图像中创建点云

def create_pcd_from_image_and_depth(self, cam, rgb, depth, init=False):
        if init:
            #获取点云的下采样因子,用于控制点云的密度
            downsample_factor = self.config["Dataset"]["pcd_downsample_init"]
        else:
            downsample_factor = self.config["Dataset"]["pcd_downsample"]
        point_size = self.config["Dataset"]["point_size"]
        if "adaptive_pointsize" in self.config["Dataset"]:
            if self.config["Dataset"]["adaptive_pointsize"]:
                point_size = min(0.05, point_size * np.median(depth))
        rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( #RGB图像数据
            rgb,
            depth,
            depth_scale=1.0,
            depth_trunc=100.0,
            convert_rgb_to_intensity=False,
        )

        W2C = getWorld2View2(cam.R, cam.T).cpu().numpy()
        pcd_tmp = o3d.geometry.PointCloud.create_from_rgbd_image(
            rgbd,
            o3d.camera.PinholeCameraIntrinsic(
                cam.image_width,
                cam.image_height,
                cam.fx,
                cam.fy,
                cam.cx,
                cam.cy,
            ),
            extrinsic=W2C,
            project_valid_depth_only=True,
        )
        pcd_tmp = pcd_tmp.random_down_sample(1.0 / downsample_factor)
        new_xyz = np.asarray(pcd_tmp.points)
        new_rgb = np.asarray(pcd_tmp.colors)

        pcd = BasicPointCloud(
            points=new_xyz, colors=new_rgb, normals=np.zeros((new_xyz.shape[0], 3))
        )
        self.ply_input = pcd

        fused_point_cloud = torch.from_numpy(np.asarray(pcd.points)).float().cuda()
        fused_color = RGB2SH(torch.from_numpy(np.asarray(pcd.colors)).float().cuda())
        features = (
            torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2))
            .float()
            .cuda()
        )
        features[:, :3, 0] = fused_color
        features[:, 3:, 1:] = 0.0

        dist2 = (
            torch.clamp_min(
                distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()),
                0.0000001,
            )
            * point_size
        )
        scales = torch.log(torch.sqrt(dist2))[..., None]
        if not self.isotropic:
            scales = scales.repeat(1, 3)

        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
        rots[:, 0] = 1

        #计算每个点的不透明度
        opacities = inverse_sigmoid(
            0.5
            * torch.ones(
                (fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"
            )
        )

        return fused_point_cloud, features, scales, rots, opacities

 4.1.4 合并新的点云数据

将新的点云数据(包括点的位置、特征、尺度、旋转和不透明度)合并到现有的点云模型中。

    def extend_from_pcd(
        self, fused_point_cloud, features, scales, rots, opacities, kf_id
    ):
        new_xyz = nn.Parameter(fused_point_cloud.requires_grad_(True))
        new_features_dc = nn.Parameter(#提取颜色信息
            features[:, :, 0:1].transpose(1, 2).contiguous().requires_grad_(True)
        )
        new_features_rest = nn.Parameter(#提取其他特征信息
            features[:, :, 1:].transpose(1, 2).contiguous().requires_grad_(True)
        )
        #将点云的尺度、旋转和不透明度信息转化为 nn.Parameter 对象
        new_scaling = nn.Parameter(scales.requires_grad_(True))
        new_rotation = nn.Parameter(rots.requires_grad_(True))
        new_opacity = nn.Parameter(opacities.requires_grad_(True))

        new_unique_kfIDs = torch.ones((new_xyz.shape[0])).int() * kf_id
        new_n_obs = torch.zeros((new_xyz.shape[0])).int()
        self.densification_postfix(
            new_xyz,
            new_features_dc,
            new_features_rest,
            new_opacity,
            new_scaling,
            new_rotation,
            new_kf_ids=new_unique_kfIDs,
            new_n_obs=new_n_obs,
        )

4.1.5  更新点云模型

def extend_from_pcd_seq(
        self, cam_info, kf_id=-1, init=False, scale=2.0, depthmap=None
    ):
        fused_point_cloud, features, scales, rots, opacities = (
            self.create_pcd_from_image(cam_info, init, scale=scale, depthmap=depthmap)
        )
        #将生成的点云数据扩展到现有的点云模型中
        self.extend_from_pcd(
            fused_point_cloud, features, scales, rots, opacities, kf_id
        )

4.2 一些问题

1.@property?

在 Python 中,@property 是一个装饰器,用于将一个方法定义为一个属性,使得可以通过访问属性的方式来调用方法,而不需要显式地使用方法调用语法。

2.extend_from_pcd和extend_from_pcd_seq的区别?

extend_from_pcd 方法是将新的点云数据(包括点的位置、特征、尺度、旋转和不透明度)合并到现有的点云模型中。这是一个比较底层的方法,用于将处理后的点云数据整合到现有模型中。

extend_from_pcd_seq 方法是一个高层次的方法,用于处理一个新的点云序列(从图像中生成的点云序列)。它首先调用 create_pcd_from_image 方法来生成点云数据,然后将这些点云数据传递给 extend_from_pcd 方法进行处理。

 5. base_config.yaml

5.1 函数分析

该配置文件包括了 SLAM 和 3DGS 两部分的参数设置。 

## SLAM parameters
Results:
  save_results: False
  save_dir: "results"
  save_trj: False #是否保存轨迹数据
  save_trj_kf_intv: 5 #保存轨迹的关键帧间隔
  use_gui: True
  eval_rendering: False
  use_wandb: False #是否使用 wandb 进行实验管理和可视化

Dataset:
  type: 'euroc' #数据集类型
  sensor_type: 'stereo' #传感器类型
  pcd_downsample: 128 #点云下采样因子
  pcd_downsample_init: 32
  adaptive_pointsize: False #自适应点大小
  point_size: 0.1

#训练参数
Training:
  #Initialization
  init_itr_num: 1050 #迭代次数
  init_gaussian_update: 100 #高斯更新步数
  init_gaussian_reset: 500 #高斯重置步数
  init_gaussian_th: 0.005 #高斯函数阈值
  init_gaussian_extent: 30 #高斯函数范围
  # Tracking and Mapping 
  tracking_itr_num: 100 #跟踪阶段迭代次数
  mapping_itr_num: 150 #映射阶段迭代次数
  gaussian_update_every: 150 
  gaussian_update_offset: 50
  gaussian_th: 0.7
  gaussian_extent: 1.0
  gaussian_reset: 2001
  size_threshold: 20 #点云大小的阈值
  kf_interval: 5 #关键帧间隔
  window_size: 8 #滑动窗口大小
  pose_window: 3 #位姿窗口大小
  edge_threshold: 1.1 #边缘检测阈值
  rgb_boundary_threshold: 0.01
  alpha: 0.9
  kf_translation: 0.08
  kf_min_translation: 0.05
  kf_overlap: 0.9
  kf_cutoff: 0.3 #关键帧的切断距离
  prune_mode: 'slam'
  single_thread: False
  spherical_harmonics: False
  lr:
    cam_rot_delta: 0.003
    cam_trans_delta: 0.001


## 3DGS default parameters
opt_params:
  iterations: 30000
  position_lr_init: 0.00016
  position_lr_final: 0.0000016
  position_lr_delay_mult: 0.01
  position_lr_max_steps: 30000
#学习率
  feature_lr: 0.0025 #特征
  opacity_lr: 0.05 #不透明度
  scaling_lr: 0.001 #缩放
  rotation_lr: 0.001 #旋转
  percent_dense: 0.01 #密集化百分比
  lambda_dssim: 0.2 #DSSIM 损失的权重
  densification_interval: 100
  opacity_reset_interval: 3000
  densify_from_iter: 500
  densify_until_iter: 15000
  densify_grad_threshold: 0.0002

#模型参数
model_params:
  sh_degree: 0
  source_path: ""
  model_path: ""
  resolution: -1
  white_background: False
  data_device: "cuda"

#管道参数
pipeline_params:
  convert_SHs_python: False #是否使用 Python 实现来转换球面谐波
  compute_cov3D_python: False #是否使用 Python 实现来计算3D方差

5.2 一些问题

1.wandb?

wandb(Weights & Biases)是一个流行的实验管理和机器学习可视化工具。它提供了许多功能来帮助数据科学家和机器学习工程师跟踪实验、记录结果、可视化数据和共享模型。

2.euroc数据集?

Euroc 数据集是一个用于视觉里程计和SLAM研究的标准数据集。这个数据集由瑞士联邦理工学院(ETH Zurich)提供,专门用于评估和比较不同的视觉SLAM算法,其中包括多个真实世界的飞行机器人(无人机)在室内环境中的数据采集。

3.自适应点大小?

“自适应点大小”(Adaptive Point Size)是3D点云处理中的一种技术,用于根据点云中点的深度信息调整点的可视化大小。这种技术可以使得点云的可视化更加准确地反映真实场景中的空间结构。

4.DSSIM?

DSSIM(Dissimilarity of Structural Similarity Index)是通过对结构相似性指标(SSIM)的倒数来衡量两幅图像之间的差异。与 SSIM 指标不同,DSSIM 是一个度量差异的指标,数值越大表示图像之间的差异越大。

SSIM 是一种用于评估图像质量的常用指标,它基于图像的亮度、对比度和结构信息来衡量图像的相似性。SSIM 的值介于 0 和 1 之间,值越接近 1 表示两幅图像越相似,值越接近 0 表示差异越大。DSSIM 通过计算 SSIM 的倒数来将这些度量转换为一个差异量度。

 6.eval_utils.py

6.1函数分析


'''
#对估计的轨迹和真实轨迹进行对齐,然后计算和返回ATE(绝对轨迹误差)的RMSE(均方根误差)。它还会生成并保存评估的统计数据和可视化图表。

pose_gt:真实轨迹的位姿序列
pose_est:估计的轨迹序列
plot_dir:用于保存生成图像和统计数据的目录路径
label:用于区分不同的评估结果
monocular:bool值,判断是否为单目相机,如果是,则在轨迹对齐时需要校正尺度。
'''

def evaluate_evo(poses_gt, poses_est, plot_dir, label, monocular=False):
    ## Plot
    #通过 PosePath3D 类创建轨迹对象,将输入的位姿序列转换为轨迹对象。
    traj_ref = PosePath3D(poses_se3=poses_gt) 
    traj_est = PosePath3D(poses_se3=poses_est)
    #轨迹对齐
    traj_est_aligned = trajectory.align_trajectory(
        traj_est, traj_ref, correct_scale=monocular
    )

    ## RMSE
    pose_relation = metrics.PoseRelation.translation_part
    data = (traj_ref, traj_est_aligned)
    ape_metric = metrics.APE(pose_relation)
    ape_metric.process_data(data)
    ape_stat = ape_metric.get_statistic(metrics.StatisticsType.rmse)
    ape_stats = ape_metric.get_all_statistics()
    Log("RMSE ATE \[m]", ape_stat, tag="Eval")


    #保存统计数据
    #文件名根据'label'参数生成
    with open(
        os.path.join(plot_dir, "stats_{}.json".format(str(label))),
        "w",
        encoding="utf-8",
    ) as f:
        json.dump(ape_stats, f, indent=4)

    #生成并保存轨迹图
    #使用 evo.tools.plot 工具绘制真实轨迹和对齐后的估计轨迹,并根据误差绘制彩色地图。
    plot_mode = evo.tools.plot.PlotMode.xy
    fig = plt.figure()
    ax = evo.tools.plot.prepare_axis(fig, plot_mode)
    ax.set_title(f"ATE RMSE: {ape_stat}")
    evo.tools.plot.traj(ax, plot_mode, traj_ref, "--", "gray", "gt")
    evo.tools.plot.traj_colormap(
        ax,
        traj_est_aligned,
        ape_metric.error,
        plot_mode,
        min_map=ape_stats["min"],
        max_map=ape_stats["max"],
    )
    ax.legend()
    plt.savefig(os.path.join(plot_dir, "evo_2dplot_{}.png".format(str(label))), dpi=90)

    #返回RMSE值
    return ape_stat

'''
iterations:当前迭代次数
final:ture-最终评估;false-中间评估
'''
#从关键帧中提取位姿信息,生成估计和真实轨迹的数据,并调用evaluate_evo进行ATE评估和日志记录。
def eval_ate(frames, kf_ids, save_dir, iterations, final=False, monocular=False):
    #轨迹数据的初始化
    trj_data = dict()
    latest_frame_idx = kf_ids[-1] + 2 if final else kf_ids[-1] + 1
    trj_id, trj_est, trj_gt = [], [], []
    trj_est_np, trj_gt_np = [], []

    #生成位姿矩阵的辅助函数
    def gen_pose_matrix(R, T):
        pose = np.eye(4)
        pose[0:3, 0:3] = R.cpu().numpy()
        pose[0:3, 3] = T.cpu().numpy()
        return pose

    #处理每个关键帧的位姿信息
    for kf_id in kf_ids:
        kf = frames[kf_id]
        pose_est = np.linalg.inv(gen_pose_matrix(kf.R, kf.T))
        pose_gt = np.linalg.inv(gen_pose_matrix(kf.R_gt, kf.T_gt))

        trj_id.append(frames[kf_id].uid)
        trj_est.append(pose_est.tolist())
        trj_gt.append(pose_gt.tolist())

        trj_est_np.append(pose_est)
        trj_gt_np.append(pose_gt)

    #保存轨迹数据
    trj_data["trj_id"] = trj_id
    trj_data["trj_est"] = trj_est
    trj_data["trj_gt"] = trj_gt

    plot_dir = os.path.join(save_dir, "plot")
    mkdir_p(plot_dir)

    label_evo = "final" if final else "{:04}".format(iterations)
    with open(
        os.path.join(plot_dir, f"trj_{label_evo}.json"), "w", encoding="utf-8"
    ) as f:
        json.dump(trj_data, f, indent=4)

    #评估和日志记录
    ate = evaluate_evo(
        poses_gt=trj_gt_np,
        poses_est=trj_est_np,
        plot_dir=plot_dir,
        label=label_evo,
        monocular=monocular,
    )
    wandb.log({"frame_idx": latest_frame_idx, "ate": ate})
    return ate


#评估渲染结果的质量,计算PSNR、SSIM和LPIPS等图像质量指标
def eval_rendering(
    frames,#帧的列表
    gaussians,#高斯模型参数
    dataset,#数据集对象,用于获取真实图像
    save_dir,#保存生成图像和统计数据的目录路径
    pipe,#渲染管道,包含渲染过程中的相关配置
    background,#背景图像
    kf_indices,#关键帧的索引,渲染时跳过这些帧
    iteration="final",#当前迭代次数
):
    interval = 5
    img_pred, img_gt, saved_frame_idx = [], [], []
    end_idx = len(frames) - 1 if iteration == "final" or "before_opt" else iteration
    psnr_array, ssim_array, lpips_array = [], [], []
    cal_lpips = LearnedPerceptualImagePatchSimilarity(
        net_type="alex", normalize=True
    ).to("cuda")

    #处理每个帧的渲染结果
    for idx in range(0, end_idx, interval):
        if idx in kf_indices:
            continue
        saved_frame_idx.append(idx)
        frame = frames[idx]
        gt_image, _, _ = dataset[idx]

        rendering = render(frame, gaussians, pipe, background)["render"]
        image = torch.clamp(rendering, 0.0, 1.0)

        gt = (gt_image.cpu().numpy().transpose((1, 2, 0)) * 255).astype(np.uint8)
        pred = (image.detach().cpu().numpy().transpose((1, 2, 0)) * 255).astype(
            np.uint8
        )
        gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)
        pred = cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)
        img_pred.append(pred)
        img_gt.append(gt)

        mask = gt_image > 0

        psnr_score = psnr((image[mask]).unsqueeze(0), (gt_image[mask]).unsqueeze(0))
        ssim_score = ssim((image).unsqueeze(0), (gt_image).unsqueeze(0))
        lpips_score = cal_lpips((image).unsqueeze(0), (gt_image).unsqueeze(0))

        psnr_array.append(psnr_score.item())
        ssim_array.append(ssim_score.item())
        lpips_array.append(lpips_score.item())

    #计算并保存统计数据
    output = dict()
    output["mean_psnr"] = float(np.mean(psnr_array))
    output["mean_ssim"] = float(np.mean(ssim_array))
    output["mean_lpips"] = float(np.mean(lpips_array))

    Log(
        f'mean psnr: {output["mean_psnr"]}, ssim: {output["mean_ssim"]}, lpips: {output["mean_ssim"]}',
        tag="Eval",
    )

    psnr_save_dir = os.path.join(save_dir, "psnr", str(iteration))
    mkdir_p(psnr_save_dir)

    json.dump(
        output,
        open(os.path.join(psnr_save_dir, "final_result.json"), "w", encoding="utf-8"),
        indent=4,
    )
    return output


#保存高斯点云数据
def save_gaussians(gaussians, name, iteration, final=False):
    if name is None:
        return
    if final:
        point_cloud_path = os.path.join(name, "point_cloud/final")
    else:
        point_cloud_path = os.path.join(
            name, "point_cloud/iteration_{}".format(str(iteration))
        )
    gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply"))

7.camera_utils.py

7.1 函数分析

'''
这段代码定义了一个`Camera`类,它继承自`nn.Module`,表示一个带有摄像机参数的模块。主要功能如下:

1. **初始化函数**`__init__`:初始化摄像机的内外参数,包括旋转、平移矩阵和内参矩阵等。
2. **静态方法**`init_from_dataset`和`init_from_gui`:从数据集或GUI中初始化摄像机实例。
3. **属性**`world_view_transform`和`full_proj_transform`:返回世界到视图和完整投影变换矩阵。
4. **方法**`update_RT`:更新旋转和平移矩阵。
5. **方法**`compute_grad_mask`:计算图像的梯度掩码,用于确定边缘区域。
6. **方法**`clean`:清除图像数据和梯度掩码等。

### 详细分析

- **初始化函数**
  - `self.R` 和 `self.T` 分别表示旋转和平移矩阵。
  - `self.R_gt` 和 `self.T_gt` 保存了地面真实的旋转和平移矩阵。
  - `self.original_image` 和 `self.depth` 保存了颜色图像和深度图像。
  - `fx, fy, cx, cy, fovx, fovy` 等表示相机的内参和视场角。
  - `self.cam_rot_delta` 和 `self.cam_trans_delta` 是相机位姿的参数,允许在训练期间进行微调。
  - `self.exposure_a` 和 `self.exposure_b` 表示曝光参数。

- **静态方法**
  - `init_from_dataset`:从数据集中提取颜色、深度和位姿信息,初始化相机。
  - `init_from_gui`:从GUI中提取投影矩阵等参数,初始化相机。

- **属性**
  - `world_view_transform`:计算世界到视图的变换矩阵。
  - `full_proj_transform`:计算完整的投影变换矩阵。

- **方法**
  - `update_RT`:更新旋转和平移矩阵。
  - `compute_grad_mask`:计算图像的梯度掩码,用于确定边缘区域。
  - `clean`:清除图像数据和梯度掩码等,以释放内存。
'''
class Camera(nn.Module):
    def __init__(
        self,
        uid,
        color,
        depth,
        gt_T,
        projection_matrix,
        fx,
        fy,
        cx,
        cy,
        fovx,
        fovy,
        image_height,
        image_width,
        device="cuda:0",
    ):
        super(Camera, self).__init__()
        self.uid = uid
        self.device = device

        T = torch.eye(4, device=device)
        self.R = T[:3, :3]
        self.T = T[:3, 3]
        self.R_gt = gt_T[:3, :3]
        self.T_gt = gt_T[:3, 3]

        self.original_image = color
        self.depth = depth
        self.grad_mask = None #梯度掩码

        self.fx = fx
        self.fy = fy
        self.cx = cx
        self.cy = cy
        self.FoVx = fovx
        self.FoVy = fovy
        self.image_height = image_height
        self.image_width = image_width

        self.cam_rot_delta = nn.Parameter(
            torch.zeros(3, requires_grad=True, device=device)
        )
        self.cam_trans_delta = nn.Parameter(
            torch.zeros(3, requires_grad=True, device=device)
        )

        self.exposure_a = nn.Parameter(
            torch.tensor([0.0], requires_grad=True, device=device)
        )
        self.exposure_b = nn.Parameter(
            torch.tensor([0.0], requires_grad=True, device=device)
        )

        self.projection_matrix = projection_matrix.to(device=device)

    @staticmethod
    def init_from_dataset(dataset, idx, projection_matrix):
        gt_color, gt_depth, gt_pose = dataset[idx]
        return Camera(
            idx,
            gt_color,
            gt_depth,
            gt_pose,
            projection_matrix,
            dataset.fx,
            dataset.fy,
            dataset.cx,
            dataset.cy,
            dataset.fovx,
            dataset.fovy,
            dataset.height,
            dataset.width,
            device=dataset.device,
        )

    @staticmethod
    def init_from_gui(uid, T, FoVx, FoVy, fx, fy, cx, cy, H, W):
        projection_matrix = getProjectionMatrix2(
            znear=0.01, zfar=100.0, fx=fx, fy=fy, cx=cx, cy=cy, W=W, H=H
        ).transpose(0, 1)
        return Camera(
            uid, None, None, T, projection_matrix, fx, fy, cx, cy, FoVx, FoVy, H, W
        )

    @property
    def world_view_transform(self):
        return getWorld2View2(self.R, self.T).transpose(0, 1)

    @property
    def full_proj_transform(self):
        return (
            self.world_view_transform.unsqueeze(0).bmm(
                self.projection_matrix.unsqueeze(0)
            )
        ).squeeze(0)

    @property
    def camera_center(self):
        return self.world_view_transform.inverse()[3, :3]

    def update_RT(self, R, t):
        self.R = R.to(device=self.device)
        self.T = t.to(device=self.device)

    def compute_grad_mask(self, config): #计算梯度掩码,用于在图像中确定边缘区域
        edge_threshold = config["Training"]["edge_threshold"] #从配置中获取边缘阈值,这个阈值用于确定梯度的强度,超过阈值的部分将被视为边缘。

        # 将原始图像转换为灰度图像。这里使用了 mean 函数对所有通道进行平均,dim=0 表示对通道维度进行平均,keepdim=True 保持维度的数量。
        gray_img = self.original_image.mean(dim=0, keepdim=True)
        # 计算灰度图像的垂直和水平方向的梯度。
        gray_grad_v, gray_grad_h = image_gradient(gray_img)
        # 根据灰度图像生成垂直和水平梯度的掩码。掩码的作用是在计算梯度时过滤掉图像中不需要考虑的部分,例如图像边缘。
        mask_v, mask_h = image_gradient_mask(gray_img)
        gray_grad_v = gray_grad_v * mask_v
        gray_grad_h = gray_grad_h * mask_h
        img_grad_intensity = torch.sqrt(gray_grad_v**2 + gray_grad_h**2)
        # 计算梯度的强度,通过对垂直和水平方向的梯度分量进行平方并求和,然后取平方根得到。

        # 如果数据集类型是 "replica",则将图像分成多个块,并计算每个块的梯度强度的中位数。然后根据中位数和阈值乘数,将梯度强度大于阈值的部分置为1,否则置为0。
        if config["Dataset"]["type"] == "replica":
            row, col = 32, 32
            multiplier = edge_threshold
            _, h, w = self.original_image.shape
            for r in range(row):
                for c in range(col):
                    block = img_grad_intensity[
                        :,
                        r * int(h / row) : (r + 1) * int(h / row),
                        c * int(w / col) : (c + 1) * int(w / col),
                    ]
                    th_median = block.median()
                    block[block > (th_median * multiplier)] = 1
                    block[block <= (th_median * multiplier)] = 0
            self.grad_mask = img_grad_intensity
        else: #如果不是 "replica" 类型的数据集,则计算整个图像的梯度强度中位数,并将梯度强度大于阈值的部分设为 True,否则为 False。
            median_img_grad_intensity = img_grad_intensity.median()
            self.grad_mask = (
                img_grad_intensity > median_img_grad_intensity * edge_threshold
            )

    def clean(self):
        self.original_image = None
        self.depth = None
        self.grad_mask = None

        self.cam_rot_delta = None
        self.cam_trans_delta = None

        self.exposure_a = None
        self.exposure_b = None

8.pose_utils.py

8.1 函数分析

#将R和T组合成4*4的变换矩阵
def rt2mat(R, T):
    mat = np.eye(4)
    mat[0:3, 0:3] = R
    mat[0:3, 3] = T
    return mat

#生成给定向量x的反对称矩阵
def skew_sym_mat(x):
    device = x.device
    dtype = x.dtype
    ssm = torch.zeros(3, 3, device=device, dtype=dtype)
    ssm[0, 1] = -x[2]
    ssm[0, 2] = x[1]
    ssm[1, 0] = x[2]
    ssm[1, 2] = -x[0]
    ssm[2, 0] = -x[1]
    ssm[2, 1] = x[0]
    return ssm

#计算给定旋转向量的SO(3)指数映射,返回对应的旋转矩阵
def SO3_exp(theta):
    device = theta.device
    dtype = theta.dtype

    W = skew_sym_mat(theta)
    W2 = W @ W
    angle = torch.norm(theta)
    I = torch.eye(3, device=device, dtype=dtype)
    if angle < 1e-5:
        return I + W + 0.5 * W2
    else:
        return (
            I
            + (torch.sin(angle) / angle) * W
            + ((1 - torch.cos(angle)) / (angle**2)) * W2
        )

#计算用于平移部分的矩阵,用于SE(3)指数映射的计算
def V(theta):
    dtype = theta.dtype
    device = theta.device
    I = torch.eye(3, device=device, dtype=dtype)
    W = skew_sym_mat(theta)
    W2 = W @ W
    angle = torch.norm(theta)
    if angle < 1e-5:
        V = I + 0.5 * W + (1.0 / 6.0) * W2
    else:
        V = (
            I
            + W * ((1.0 - torch.cos(angle)) / (angle**2))
            + W2 * ((angle - torch.sin(angle)) / (angle**3))
        )
    return V

#计算给定位移向量tau的SE(3)指数映射,返回4x4的变换矩阵
def SE3_exp(tau):
    dtype = tau.dtype
    device = tau.device

    rho = tau[:3]
    theta = tau[3:]
    R = SO3_exp(theta)
    t = V(theta) @ rho

    T = torch.eye(4, device=device, dtype=dtype)
    T[:3, :3] = R
    T[:3, 3] = t
    return T

#根据相机当前的旋转和平移增量更新相机的位姿,并判断是否收敛
def update_pose(camera, converged_threshold=1e-4):
    tau = torch.cat([camera.cam_trans_delta, camera.cam_rot_delta], axis=0)

    T_w2c = torch.eye(4, device=tau.device)
    T_w2c[0:3, 0:3] = camera.R
    T_w2c[0:3, 3] = camera.T

    new_w2c = SE3_exp(tau) @ T_w2c

    new_R = new_w2c[0:3, 0:3]
    new_T = new_w2c[0:3, 3]

    converged = tau.norm() < converged_threshold
    camera.update_RT(new_R, new_T)

    camera.cam_rot_delta.data.fill_(0)
    camera.cam_trans_delta.data.fill_(0)
    return converged

  • 13
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值