可以看出来这里是先进行高斯建图器初始化,再用一个线程启动run,初始化和run都干了啥?
std::filesystem::path gaussian_cfg_path(argv[3]);
std::shared_ptr<GaussianMapper> pGausMapper =
std::make_shared<GaussianMapper>(
pSLAM, gaussian_cfg_path, output_dir, 0, device_type);
std::thread training_thd(&GaussianMapper::run, pGausMapper.get());
构造函数
主要进行建图器的初始化,初始化了gaussianModel和gaussianScene
从slam系统里获取相机,把相机送入gaussianScene
GaussianMapper::GaussianMapper(
std::shared_ptr<ORB_SLAM3::System> pSLAM, //orb-slam系统的指针
std::filesystem::path gaussian_config_file_path, //3dgs的配置文件
std::filesystem::path result_dir, //放结果的路径
int seed, //随机种子(?)
torch::DeviceType device_type /*设备类型*/)
/*使用初始化列表*/
: pSLAM_(pSLAM),
initial_mapped_(false), /*未初始化*/
interrupt_training_(false), /*未曾断点续训过*/
stopped_(false),
iteration_(0), /*已迭代次数*/
ema_loss_for_log_(0.0f),
SLAM_ended_(false),
loop_closure_iteration_(false),
min_num_initial_map_kfs_(15UL),
large_rot_th_(1e-1f),
large_trans_th_(1e-2f),
training_report_interval_(0)
{
/*移除显而易见的代码*/
// Initialize scene and model
// 初始化场景和模型,调用GaussianModel和GaussianScene初始化
gaussians_ = std::make_shared<GaussianModel>(model_params_);
scene_ = std::make_shared<GaussianScene>(model_params_);
// Mode
if (!pSLAM) {
// NO SLAM
return;
}
// 根据orb-slam系统的指针设置传感器类型
switch (pSLAM->getSensorType())
{
/*单目和双目相机的设置*/
case ORB_SLAM3::System::RGBD:
case ORB_SLAM3::System::IMU_RGBD:
{
this->sensor_type_ = RGBD;
}
break;
}
// Cameras
// TODO: not only monocular
auto settings = pSLAM->getSettings();
cv::Size SLAM_im_size = settings->newImSize();
UndistortParams undistort_params(
SLAM_im_size,
settings->camera1DistortionCoef() //获取相机畸变系数
);
//从orb-slam系统里获取所有相机,对于每个相机,放入3d高斯重建系统GaussianScene里
auto vpCameras = pSLAM->getAtlas()->GetAllCameras();
for (auto& SLAM_camera : vpCameras) {
Camera camera;
camera.camera_id_ = SLAM_camera->GetId();
if (SLAM_camera->GetType() == ORB_SLAM3::GeometricCamera::CAM_PINHOLE) {
camera.setModelId(Camera::CameraModelType::PINHOLE);
float SLAM_fx = SLAM_camera->getParameter(0);
float SLAM_fy = SLAM_camera->getParameter(1);
float SLAM_cx = SLAM_camera->getParameter(2);
float SLAM_cy = SLAM_camera->getParameter(3);
// Old K, i.e. K in SLAM
cv::Mat K = (
cv::Mat_<float>(3, 3)
<< SLAM_fx, 0.f, SLAM_cx,
0.f, SLAM_fy, SLAM_cy,
0.f, 0.f, 1.f
);
camera.width_ = undistort_params.old_size_.width;
float x_ratio = static_cast<float>(camera.width_) / undistort_params.old_size_.width;
camera.height_ = undistort_params.old_size_.height;
float y_ratio = static_cast<float>(camera.height_) / undistort_params.old_size_.height;
camera.num_gaus_pyramid_sub_levels_ = num_gaus_pyramid_sub_levels_;
camera.gaus_pyramid_width_.resize(num_gaus_pyramid_sub_levels_);
camera.gaus_pyramid_height_.resize(num_gaus_pyramid_sub_levels_);
for (int l = 0; l < num_gaus_pyramid_sub_levels_; ++l) {
camera.gaus_pyramid_width_[l] = camera.width_ * this->kf_gaus_pyramid_factors_[l];
camera.gaus_pyramid_height_[l] = camera.height_ * this->kf_gaus_pyramid_factors_[l];
}
camera.params_[0]/*new fx*/= SLAM_fx * x_ratio;
camera.params_[1]/*new fy*/= SLAM_fy * y_ratio;
camera.params_[2]/*new cx*/= SLAM_cx * x_ratio;
camera.params_[3]/*new cy*/= SLAM_cy * y_ratio;
cv::Mat K_new = (
cv::Mat_<float>(3, 3)
<< camera.params_[0], 0.f, camera.params_[2],
0.f, camera.params_[1], camera.params_[3],
0.f, 0.f, 1.f
);
// Undistortion
if (this->sensor_type_ == MONOCULAR || this->sensor_type_ == RGBD)
undistort_params.dist_coeff_.copyTo(camera.dist_coeff_);
camera.initUndistortRectifyMapAndMask(K, SLAM_im_size, K_new, true);
undistort_mask_[camera.camera_id_] =
tensor_utils::cvMat2TorchTensor_Float32(
camera.undistort_mask, device_type_);
cv::Mat viewer_sub_undistort_mask;
int viewer_image_height_ = camera.height_ * rendered_image_viewer_scale_;
int viewer_image_width_ = camera.width_ * rendered_image_viewer_scale_;
cv::resize(camera.undistort_mask, viewer_sub_undistort_mask,
cv::Size(viewer_image_width_, viewer_image_height_));
viewer_sub_undistort_mask_[camera.camera_id_] =
tensor_utils::cvMat2TorchTensor_Float32(
viewer_sub_undistort_mask, device_type_);
cv::Mat viewer_main_undistort_mask;
int viewer_image_height_main_ = camera.height_ * rendered_image_viewer_scale_main_;
int viewer_image_width_main_ = camera.width_ * rendered_image_viewer_scale_main_;
cv::resize(camera.undistort_mask, viewer_main_undistort_mask,
cv::Size(viewer_image_width_main_, viewer_image_height_main_));
viewer_main_undistort_mask_[camera.camera_id_] =
tensor_utils::cvMat2TorchTensor_Float32(
viewer_main_undistort_mask, device_type_);
}
if (!viewer_camera_id_set_) {
viewer_camera_id_ = camera.camera_id_;
viewer_camera_id_set_ = true;
}
this->scene_->addCamera(camera);
}// end for
}
run线程
在run里面进行第一次高斯建图(First loop: Initial gaussian mapping),高斯增量建图和Third loop: Tail gaussian optimization(感觉像高斯的稠密化)
run就是根据当前的情况不断调用trainForOneIteration()
对于First loop: Initial gaussian mapping:
从orb-slam系统中获取关键帧和地图点,地图点送进gaussianScene中,关键帧信息送进GaussianKeyframe中。如果用关键帧i初始化地图,那么增加关键帧i的使用次数
关键帧信息包括设置关键帧位姿,设置相机参数,去除相机畸变并设置关键帧的图片,初始化高斯金字塔的尺寸和使用次数,调用computeTransformTensors将3D点投影到2D平面上。
最后把GaussianKeyframe送进gaussianScene中
对于每个在gaussianScene的keyframe,根据设备类型创建多分辨率图像
从pcd中创建点云,调用trainForOneIteration()训练一次,设置3dgaussian地图已经初始化
对于Second loop: Incremental gaussian mapping。正常调用trainForOneIteration()即可
注意到second loop有个额外的combineMappingOperations()操作,这个操作是根据orb-slam的BA/loop closure情况调整高斯地图
1.局部BA。对于从slam系统里获取的帧,如果此帧已经存在,只调整帧位姿,否则添加新帧handleNewKeyframe(kf)。并且无论如何都要处理新来的关键点,调用increasePcd()
2.回环检测。如果知道这是一个回环帧,且回环帧已经存在,则计算回环边的误差,并利用scaledTransformVisiblePointsOfKeyframe() 纠正(尺度,变换等)能看见的点。同样要对新点进行处理。
3.尺度调整。这个注意到尺度有问题。调用applyScaledTransformation()纠正尺度,或者直接对cache里面的point cloud操作
对于 Third loop: Tail gaussian optimization,根据稠密化的情况调用trainForOneIteration()即可
void GaussianMapper::run()
{
// First loop: Initial gaussian mapping
//初始化高斯建图,主要读取点云和相机位姿,准备用于训练的多分辨率图像,进行第一次训练
while (!isStopped()) {
// Check conditions for initial mapping 检查初始化情况
if (hasMetInitialMappingConditions()) {
pSLAM_->getAtlas()->clearMappingOperation();
// Get initial sparse map 获取稀疏地图
auto pMap = pSLAM_->getAtlas()->GetCurrentMap();
std::vector<ORB_SLAM3::KeyFrame*> vpKFs; //从orb-slam系统中获取关键帧
std::vector<ORB_SLAM3::MapPoint*> vpMPs; //从orb-eslam系统中获取地图点
//读取点云和相机位姿
{
std::unique_lock<std::mutex> lock_map(pMap->mMutexMapUpdate);
vpKFs = pMap->GetAllKeyFrames();
vpMPs = pMap->GetAllMapPoints();
for (const auto& pMP : vpMPs){ //获取地图点位置和颜色,送进gaussianScene中
Point3D point3D;
auto pos = pMP->GetWorldPos();
point3D.xyz_(0) = pos.x();
point3D.xyz_(1) = pos.y();
point3D.xyz_(2) = pos.z();
auto color = pMP->GetColorRGB();
point3D.color_(0) = color(0);
point3D.color_(1) = color(1);
point3D.color_(2) = color(2);
scene_->cachePoint3D(pMP->mnId, point3D);
}
for (const auto& pKF : vpKFs){//设置关键帧的相机,初始化高斯金字塔,送进gaussianScene中
std::shared_ptr<GaussianKeyframe> new_kf = std::make_shared<GaussianKeyframe>(pKF->mnId, getIteration());
new_kf->zfar_ = z_far_;
new_kf->znear_ = z_near_;
auto pose = pKF->GetPose();// 设置相机位姿
new_kf->setPose(
pose.unit_quaternion().cast<double>(),
pose.translation().cast<double>());
cv::Mat imgRGB_undistorted, imgAux_undistorted;
try { //设置相机参数,去除畸变,创建图像金字塔
// Camera
Camera& camera = scene_->cameras_.at(pKF->mpCamera->GetId());
new_kf->setCameraParams(camera); //设置相机参数
// Image (left if STEREO)
cv::Mat imgRGB = pKF->imgLeftRGB;
if (this->sensor_type_ == STEREO)
imgRGB_undistorted = imgRGB;
else
camera.undistortImage(imgRGB, imgRGB_undistorted);
// Auxiliary Image
cv::Mat imgAux = pKF->imgAuxiliary;
if (this->sensor_type_ == RGBD)
camera.undistortImage(imgAux, imgAux_undistorted);
else
imgAux_undistorted = imgAux;
new_kf->original_image_ =
tensor_utils::cvMat2TorchTensor_Float32(imgRGB_undistorted, device_type_);
new_kf->img_filename_ = pKF->mNameFile;
//高斯金字塔宽高初始化
new_kf->gaus_pyramid_height_ = camera.gaus_pyramid_height_;
new_kf->gaus_pyramid_width_ = camera.gaus_pyramid_width_;
new_kf->gaus_pyramid_times_of_use_ = kf_gaus_pyramid_times_of_use_;
}
new_kf->computeTransformTensors();
scene_->addKeyframe(new_kf, &kfid_shuffled_); //新关键帧送进Scene
increaseKeyframeTimesOfUse(new_kf, newKeyframeTimesOfUse()); //增加关键帧的使用次数
// Features
std::vector<float> pixels;
std::vector<float> pointsLocal;
pKF->GetKeypointInfo(pixels, pointsLocal);
new_kf->kps_pixel_ = std::move(pixels);
new_kf->kps_point_local_ = std::move(pointsLocal);
new_kf->img_undist_ = imgRGB_undistorted;
new_kf->img_auxiliary_undist_ = imgAux_undistorted;
}//end for (const auto& pKF : vpKFs)
}
// Prepare multi resolution images for training
// 准备用于训练的多分辨率图像
for (auto& kfit : scene_->keyframes()) {
auto pkf = kfit.second;
// 根据设备类型决定
if (device_type_ == torch::kCUDA) {
cv::cuda::GpuMat img_gpu;
img_gpu.upload(pkf->img_undist_);
pkf->gaus_pyramid_original_image_.resize(num_gaus_pyramid_sub_levels_);
for (int l = 0; l < num_gaus_pyramid_sub_levels_; ++l) {
cv::cuda::GpuMat img_resized;
cv::cuda::resize(img_gpu, img_resized,
cv::Size(pkf->gaus_pyramid_width_[l], pkf->gaus_pyramid_height_[l]));
pkf->gaus_pyramid_original_image_[l] =
tensor_utils::cvGpuMat2TorchTensor_Float32(img_resized);
}
}
else {
pkf->gaus_pyramid_original_image_.resize(num_gaus_pyramid_sub_levels_);
for (int l = 0; l < num_gaus_pyramid_sub_levels_; ++l) {
cv::Mat img_resized;
cv::resize(pkf->img_undist_, img_resized,
cv::Size(pkf->gaus_pyramid_width_[l], pkf->gaus_pyramid_height_[l]));
pkf->gaus_pyramid_original_image_[l] =
tensor_utils::cvMat2TorchTensor_Float32(img_resized, device_type_);
}
}
}
// Prepare for training 训练准备
{
std::unique_lock<std::mutex> lock_render(mutex_render_);
scene_->cameras_extent_ = std::get<1>(scene_->getNerfppNorm());
gaussians_->createFromPcd(scene_->cached_point_cloud_, scene_->cameras_extent_);
std::unique_lock<std::mutex> lock(mutex_settings_);
gaussians_->trainingSetup(opt_params_);
}
// Invoke training once 第一次训练
trainForOneIteration();
// Finish initial mapping loop
initial_mapped_ = true;
break;
}
else if (pSLAM_->isShutDown()) {
break;
}
else {
// Initial conditions not satisfied
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
// Second loop: Incremental gaussian mapping
int SLAM_stop_iter = 0;
while (!isStopped()) {
// Check conditions for incremental mapping
if (hasMetIncrementalMappingConditions()) {
combineMappingOperations();
if (cull_keyframes_)
cullKeyframes();
}
// Invoke training once
trainForOneIteration();
if (pSLAM_->isShutDown()) {
SLAM_stop_iter = getIteration();
SLAM_ended_ = true;
}
if (SLAM_ended_ || getIteration() >= opt_params_.iterations_)
break;
}
// Third loop: Tail gaussian optimization
int densify_interval = densifyInterval();
int n_delay_iters = densify_interval * 0.8;
while (getIteration() - SLAM_stop_iter <= n_delay_iters ||
getIteration() % densify_interval <= n_delay_iters ||
isKeepingTraining()) {
trainForOneIteration();
densify_interval = densifyInterval();
n_delay_iters = densify_interval * 0.8;
}// end while
// Save and clear
renderAndRecordAllKeyframes("_shutdown");
savePly(result_dir_ / (std::to_string(getIteration()) + "_shutdown") / "ply");
writeKeyframeUsedTimes(result_dir_ / "used_times", "final");
signalStop();
}
在trainForOneIteration里,随便取相机,设置高斯金字塔,设置球谐函数,设置学习率,渲染,计算loss并反向传播
考虑稠密化的事情,输出训练结果,考虑透明度重置操作
void GaussianMapper::trainForOneIteration()
{
increaseIteration(1);//增加一迭代次数
auto iter_start_timing = std::chrono::steady_clock::now();//计时器
// 从滑窗里随便找一个相机(相机来自gaussian keyframe的数据)
std::shared_ptr<GaussianKeyframe> viewpoint_cam = useOneRandomSlidingWindowKeyframe();
//关于高斯金字塔训练,设置用来训练的金字塔下采样图片的长宽
int training_level = num_gaus_pyramid_sub_levels_;
int image_height, image_width;
torch::Tensor gt_image, mask;
if (isdoingGausPyramidTraining())
training_level = viewpoint_cam->getCurrentGausPyramidLevel();
if (training_level == num_gaus_pyramid_sub_levels_) {//已经没有下采样图可以用了
image_height = viewpoint_cam->image_height_;
image_width = viewpoint_cam->image_width_;
gt_image = viewpoint_cam->original_image_.cuda();
mask = undistort_mask_[viewpoint_cam->camera_id_];
}
else { //用高斯金字塔下采样图片
image_height = viewpoint_cam->gaus_pyramid_height_[training_level];
image_width = viewpoint_cam->gaus_pyramid_width_[training_level];
gt_image = viewpoint_cam->gaus_pyramid_original_image_[training_level].cuda();
mask = scene_->cameras_.at(viewpoint_cam->camera_id_).gaus_pyramid_undistort_mask_[training_level];
}
// Mutex lock for usage of the gaussian model
//用mutex锁一下
std::unique_lock<std::mutex> lock_render(mutex_render_);
// Every 1000 its we increase the levels of SH up to a maximum degree
//每1000次迭代更新一下球谐函数的阶数
if (getIteration() % 1000 == 0 && default_sh_ < model_params_.sh_degree_)
default_sh_ += 1;
gaussians_->setShDegree(default_sh_);
// Update learning rate 更新学习率
if (pSLAM_) {
int used_times = kfs_used_times_[viewpoint_cam->fid_];
int step = (used_times <= opt_params_.position_lr_max_steps_ ? used_times : opt_params_.position_lr_max_steps_);
float position_lr = gaussians_->updateLearningRate(step);
setPositionLearningRateInit(position_lr);
}
else {
gaussians_->updateLearningRate(getIteration());
}
gaussians_->setFeatureLearningRate(featureLearningRate());
gaussians_->setOpacityLearningRate(opacityLearningRate());
gaussians_->setScalingLearningRate(scalingLearningRate());
gaussians_->setRotationLearningRate(rotationLearningRate());
// Render
// 渲染,计算损失,并反向传播
auto render_pkg = GaussianRenderer::render(
viewpoint_cam,
image_height,
image_width,
gaussians_,
pipe_params_,
background_,
override_color_
);
auto rendered_image = std::get<0>(render_pkg);
auto viewspace_point_tensor = std::get<1>(render_pkg);
auto visibility_filter = std::get<2>(render_pkg);
auto radii = std::get<3>(render_pkg);
// Get rid of black edges caused by undistortion
torch::Tensor masked_image = rendered_image * mask;
// Loss
auto Ll1 = loss_utils::l1_loss(masked_image, gt_image);
float lambda_dssim = lambdaDssim();
auto loss = (1.0 - lambda_dssim) * Ll1
+ lambda_dssim * (1.0 - loss_utils::ssim(masked_image, gt_image, device_type_));
loss.backward();//反向传播
torch::cuda::synchronize();
{
//计算EMA损失
torch::NoGradGuard no_grad;
ema_loss_for_log_ = 0.4f * loss.item().toFloat() + 0.6 * ema_loss_for_log_;
//到达阈值后,调用recordKeyframeRendered()渲染所有图像,输出psnr信息等
if (keyframe_record_interval_ &&
getIteration() % keyframe_record_interval_ == 0)
recordKeyframeRendered(masked_image, gt_image, viewpoint_cam->fid_, result_dir_, result_dir_, result_dir_);
// Densification
if (getIteration() < opt_params_.densify_until_iter_) {
// Keep track of max radii in image-space for pruning
gaussians_->max_radii2D_.index_put_( //更新最大radii 2D
{visibility_filter},//过滤可见点
torch::max(gaussians_->max_radii2D_.index({visibility_filter}),
radii.index({visibility_filter})));
gaussians_->addDensificationStats(viewspace_point_tensor, visibility_filter);//给能看见的点增加稠密化的事情
if ((getIteration() > opt_params_.densify_from_iter_) &&
(getIteration() % densifyInterval()== 0)) {
int size_threshold = (getIteration() > prune_big_point_after_iter_) ? 20 : 0;
gaussians_->densifyAndPrune(
densifyGradThreshold(),
densify_min_opacity_,//0.005,//
scene_->cameras_extent_,
size_threshold
);//根据透明度剪枝
}
//重置透明度操作
if (opacityResetInterval()
&& (getIteration() % opacityResetInterval() == 0
||(model_params_.white_background_ && getIteration() == opt_params_.densify_from_iter_)))
gaussians_->resetOpacity();
// Log and save
if (training_report_interval_ && (getIteration() % training_report_interval_ == 0))
GaussianTrainer::trainingReport(
getIteration(),
opt_params_.iterations_,
Ll1,
loss,
ema_loss_for_log_,
loss_utils::l1_loss,
iter_time,
*gaussians_,
*scene_,
pipe_params_,
background_
);
if ((all_keyframes_record_interval_ && getIteration() % all_keyframes_record_interval_ == 0)
// || loop_closure_iteration_
)
{
renderAndRecordAllKeyframes();
savePly(result_dir_ / std::to_string(getIteration()) / "ply");
}
if (loop_closure_iteration_)
loop_closure_iteration_ = false;
// Optimizer step
if (getIteration() < opt_params_.iterations_) {
gaussians_->optimizer_->step();
gaussians_->optimizer_->zero_grad(true);
}
}
}