最近在学习三维重建方面的知识,而目前效果比较好的算法有KinectFusion(Kinfu)、ElasticFusion、InfiniTAM、DynamicFusion、BundleFusion等,我就不一一介绍了,感兴趣的参考https://www.zhihu.com/question/29885222?sort=created有比较详尽的效果展示。
作为Ubuntu爱好者,在综合了编译环境、编译难度、建模效果的前提下,选择了InfiniTAM v3作为切入点,发现它的表现已经能让人满意,除了未着色外,前端里程计的跟踪也相当精准,因此建模所使用的tsdf模型在融合深度时也会尽量准确,在小尺度下,精度超过了ORB和vins的定位能力,引起了我的浓厚兴趣,在此和大家一起探索一下。
一、使用
对于使用图片使用离线建模的,深度图需要设为16UC1的编码方式,需要注意的是,如果用ros自带的opencv3.3.1的cv::imwrite函数保存深度图,在pgm图片的第二行会有一行注释,而不是一般的图片行列数,InfiniTAM的pngLib会分析报错,将该行注释删除即可,提示大家用shell删除更便捷~
int main(int argc, char** argv)
try
{
const char *arg1 = "";
const char *arg2 = NULL;
const char *arg3 = NULL;
const char *arg4 = NULL;
int arg = 1;
do {
if (argv[arg] != NULL) arg1 = argv[arg]; else break;
++arg;
if (argv[arg] != NULL) arg2 = argv[arg]; else break;
++arg;
if (argv[arg] != NULL) arg3 = argv[arg]; else break;
++arg;
if (argv[arg] != NULL) arg4 = argv[arg]; else break;
} while (false);
printf("initialising ...\n");
ImageSourceEngine *imageSource = NULL;
IMUSourceEngine *imuSource = NULL;
CreateDefaultImageSource(imageSource, imuSource, arg1, arg2, arg3, arg4);
if (imageSource==NULL)
{
std::cout << "failed to open any image stream" << std::endl;
return -1;
}
ITMLibSettings *internalSettings = new ITMLibSettings();
ITMMainEngine *mainEngine = NULL;
switch (internalSettings->libMode)
{
case ITMLibSettings::LIBMODE_BASIC:
mainEngine = new ITMBasicEngine<ITMVoxel, ITMVoxelIndex>(internalSettings, imageSource->getCalib(), imageSource->getRGBImageSize(), imageSource->getDepthImageSize());
break;
case ITMLibSettings::LIBMODE_BASIC_SURFELS:
mainEngine = new ITMBasicSurfelEngine<ITMSurfelT>(internalSettings, imageSource->getCalib(), imageSource->getRGBImageSize(), imageSource->getDepthImageSize());
break;
case ITMLibSettings::LIBMODE_LOOPCLOSURE:
mainEngine = new ITMMultiEngine<ITMVoxel, ITMVoxelIndex>(internalSettings, imageSource->getCalib(), imageSource->getRGBImageSize(), imageSource->getDepthImageSize());
break;
default:
throw std::runtime_error("Unsupported library mode!");
break;
}
UIEngine::Instance()->Initialise(argc, argv, imageSource, imuSource, mainEngine, "./Files/Out", internalSettings->deviceType);
UIEngine::Instance()->Run();
UIEngine::Instance()->Shutdown();
delete mainEngine;
delete internalSettings;
delete imageSource;
if (imuSource != NULL) delete imuSource;
return 0;
}
catch(std::exception& e)
{
std::cerr << e.what() << '\n';
return EXIT_FAILURE;
}
当然,这样的使用是基于离线数据(离散的图片) ,如果在Ubuntu与ros环境下,可以将它改造为ros驱动,和深度相机的ros驱动同时运行,这样模块可替代性和实时性都得到飞升。
二、前端定位
在最初的使用中,由于没有搞清rgbd相机双摄像头的外参(奥比中光相机的aligned系列话题让我误以为是深度图已向彩色图配准,并没有),我认为它的定位能力无法胜任,因此用vins和ORB对它的定位模块进行了替换,均无法达到数据集的“完美”效果。对于vins和ORB来说,一是对视觉匹配的要求还是存在的,在小尺度下出现的误匹配可能会带偏整个估计;二是视觉点的稀疏性导致精度不如致密的点云;三是IMU本身质量也会对效果产生影响。换句话说,它们更适用于大尺度下的不那么精确的快速定位。
从main函数看来,它初始化了三个engine,imagesource是图片处理器,ui是驱动整个流程的主体,mainEngine是处理slam和建模的部分,通过UIEngine的ProcessFrame函数,我们看到ITMBasicEngine::ProcessFrame函数,它将求得本帧的位姿。
template <typename TVoxel, typename TIndex>
ITMTrackingState::TrackingResult ITMBasicEngine<TVoxel,TIndex>::ProcessFrame(ITMUChar4Image *rgbImage, ITMShortImage *rawDepthImage, ITMIMUMeasurement *imuMeasurement)
{
// prepare image and turn it into a depth image
if (imuMeasurement == NULL) viewBuilder->UpdateView(&view, rgbImage, rawDepthImage, settings->useBilateralFilter);
else viewBuilder->UpdateView(&view, rgbImage, rawDepthImage, settings->useBilateralFilter, imuMeasurement);
if (!mainProcessingActive) return ITMTrackingState::TRACKING_FAILED;
// tracking
ORUtils::SE3Pose oldPose(*(trackingState->pose_d));
if (trackingActive) trackingController->Track(trackingState, view);
ITMTrackingState::TrackingResult trackerResult = ITMTrackingState::TRACKING_GOOD;
switch (settings->behaviourOnFailure) {
case ITMLibSettings::FAILUREMODE_RELOCALISE:
trackerResult = trackingState->trackerResult;
break;
case ITMLibSettings::FAILUREMODE_STOP_INTEGRATION:
if (trackingState->trackerResult != ITMTrackingState::TRACKING_FAILED)
trackerResult = trackingState->trackerResult;
else trackerResult = ITMTrackingState::TRACKING_POOR;
break;
default:
break;
}
//relocalisation
//重定位部分与其它SLAM类似,这里略过。。
bool didFusion = false;
if ((trackerResult == ITMTrackingState::TRACKING_GOOD || !trackingInitialised) && (fusionActive) && (relocalisationCount == 0)) {
// fusion
denseMapper->ProcessFrame(view, trackingState, scene, renderState_live);
didFusion = true;
if (framesProcessed > 50) trackingInitialised = true;
framesProcessed++;
}
if (trackerResult == ITMTrackingState::TRACKING_GOOD || trackerResult == ITMTrackingState::TRACKING_POOR)
{
if (!didFusion) denseMapper->UpdateVisibleList(view, trackingState, scene, renderState_live);
// raycast to renderState_live for tracking and free visualisation
trackingController->Prepare(trackingState, scene, view, visualisationEngine, renderState_live);
if (addKeyframeIdx >= 0)
{
ORUtils::MemoryBlock<Vector4u>::MemoryCopyDirection memoryCopyDirection =
settings->deviceType == ITMLibSettings::DEVICE_CUDA ? ORUtils::MemoryBlock<Vector4u>::CUDA_TO_CUDA : ORUtils::MemoryBlock<Vector4u>::CPU_TO_CPU;
kfRaycast->SetFrom(renderState_live->raycastImage, memoryCopyDirection);
}
}
else *trackingState->pose_d = oldPose;
return trackerResult;
}
void ITMViewBuilder_CPU::UpdateView(ITMView **view_ptr, ITMUChar4Image *rgbImage, ITMShortImage *rawDepthImage, bool useBilateralFilter, bool modelSensorNoise, bool storePreviousImage)
{
if (*view_ptr == NULL)
{
*view_ptr = new ITMView(calib, rgbImage->noDims, rawDepthImage->noDims, false);
if (this->shortImage != NULL) delete this->shortImage;
this->shortImage = new ITMShortImage(rawDepthImage->noDims, true, false);
if (this->floatImage != NULL) delete this->floatImage;
this->floatImage = new ITMFloatImage(rawDepthImage->noDims, true, false);
if (modelSensorNoise)
{
(*view_ptr)->depthNormal = new ITMFloat4Image(rawDepthImage->noDims, true, false);
(*view_ptr)->depthUncertainty = new ITMFloatImage(rawDepthImage->noDims, true, false);
}
}
ITMView *view = *view_ptr;
if (storePreviousImage)
{
if (!view->rgb_prev) view->rgb_prev = new ITMUChar4Image(rgbImage->noDims, true, false);
else view->rgb_prev->SetFrom(view->rgb, MemoryBlock<Vector4u>::CPU_TO_CPU);
}
view->rgb->SetFrom(rgbImage, MemoryBlock<Vector4u>::CPU_TO_CPU);
this->shortImage->SetFrom(rawDepthImage, MemoryBlock<short>::CPU_TO_CPU);
switch (view->calib.disparityCalib.GetType())
{
case ITMDisparityCalib::TRAFO_KINECT:
this->ConvertDisparityToDepth(view->depth, this->shortImage, &(view->calib.intrinsics_d), view->calib.disparityCalib.GetParams());
break;
case ITMDisparityCalib::TRAFO_AFFINE:
this->ConvertDepthAffineToFloat(view->depth, this->shortImage, view->calib.disparityCalib.GetParams());
break;
default:
break;
}
if (useBilateralFilter)
{
//5 steps of bilateral filtering
this->DepthFiltering(this->floatImage, view->depth);
this->DepthFiltering(view->depth, this->floatImage);
this->DepthFiltering(this->floatImage, view->depth);
this->DepthFiltering(view->depth, this->floatImage);
this->DepthFiltering(this->floatImage, view->depth);
view->depth->SetFrom(this->floatImage, MemoryBlock<float>::CPU_TO_CPU);
}
if (modelSensorNoise)
{
this->ComputeNormalAndWeights(view->depthNormal, view->depthUncertainty, view->depth, view->calib.intrinsics_d.projectionParamsSimple.all);
}
}
在这里剧透一下我们用的是ExtendedTracker,而且默认下只用深度图进行位姿估计,而单独用点云ICP的跟踪效果就已经足够应对,经我试验,感觉加不加rgb特征对位姿估计差别不是很大,接下来详细研究一下整个位姿估计过程。
我们知道ICP除了点到点之外有许多变种,在这里用的是投影ICP的方法,与KinectFusion类似,是将当前点云转换到上一帧位姿,再投影到模型中得到相对应的模型点,而每个模型点都拥有法向量,将模型点到转换后点云点作为当前“法向量”,这两个法向量的夹角就组成了求当前位姿的误差函数,这种方法也可称为点到面的ICP方法。
当然,这种方法只能用于帧率较高的传感器下,也不能进行快速运动,否则会无法达到好的收敛,正好在手持传感器上合适!
打开金手指,直接转到ITMExtendedTracker::TrackCamera函数,深度和rgb的处理大同小异,在这里只关注深度相关的处理部分。
void ITMExtendedTracker::TrackCamera(ITMTrackingState *trackingState, const ITMView *view)
{
if (!trackingState->HasValidPointCloud()) return;
if (trackingState->age_pointCloud >= 0) trackingState->framesProcessed++;
else trackingState->framesProcessed = 0;
this->SetEvaluationData(trackingState, view);
this->PrepareForEvaluation();
float hessian_good[6 * 6];
float nabla_good[6];
for (int i = 0; i < 6 * 6; ++i) hessian_good[i] = 0.0f;
for (int i = 0; i < 6; ++i) nabla_good[i] = 0.0f;
// As a workaround to the fact that the SVM has not been updated to handle the Intensity+Depth tracking
// cache the last depth results and use them when evaluating the tracking quality
float hessian_depth_good[6 * 6];
float f_depth_good = 0;
int noValidPoints_depth_good = 0;
memset(hessian_depth_good, 0, sizeof(hessian_depth_good));
for (int levelId = viewHierarchy_Depth->GetNoLevels() - 1; levelId >= 0; levelId--)
{
SetEvaluationParams(levelId);
if (currentIterationType == TRACKER_ITERATION_NONE) continue;
Matrix4f approxInvPose = trackingState->pose_d->GetInvM();
ORUtils::SE3Pose lastKnownGoodPose(*(trackingState->pose_d));
float f_old = std::numeric_limits<float>::max();
float lambda = 1.0;
//既然是迭代求取位姿,则自然有误差函数的构造
for (int iterNo = 0; iterNo < noIterationsPerLevel[levelId]; iterNo++)
{
float hessian_depth[6 * 6], hessian_RGB[6 * 6];
float nabla_depth[6], nabla_RGB[6];
float f_depth = 0.f, f_RGB = 0.f;
int noValidPoints_depth = 0, noValidPoints_RGB = 0;
// Reset arrays
memset(hessian_depth, 0, sizeof(hessian_depth));
memset(hessian_RGB, 0, sizeof(hessian_RGB));
memset(nabla_depth, 0, sizeof(nabla_depth));
memset(nabla_RGB, 0, sizeof(nabla_RGB));
// evaluate error function and gradients
if (useDepth)
{
//计算误差函数,雅可比矩阵,海塞矩阵
noValidPoints_depth = ComputeGandH_Depth(f_depth, nabla_depth, hessian_depth, approxInvPose);
//求平均值
if (noValidPoints_depth > MIN_VALID_POINTS_DEPTH)
{
// Normalize nabla and hessian
for (int i = 0; i < 6 * 6; ++i) hessian_depth[i] /= noValidPoints_depth;
for (int i = 0; i < 6; ++i) nabla_depth[i] /= noValidPoints_depth;
f_depth /= noValidPoints_depth;
}
else
{
f_depth = std::numeric_limits<float>::max();
}
}
.....
float hessian_new[6 * 6];
float nabla_new[6];
float f_new = 0.f;
int noValidPoints_new = 0;
....
//与之前的误差函数值比较
else if (useDepth)
{
noValidPoints_new = noValidPoints_depth;
f_new = f_depth;
memcpy(nabla_new, nabla_depth, sizeof(nabla_depth));
memcpy(hessian_new, hessian_depth, sizeof(hessian_depth));
}
.....
// check if error increased. If so, revert
//如果误差在迭代增大了,可能陷入局部最优,在LM法中的阻尼值需要增大,转向梯度下降法,否则阻尼值下降,转向高斯牛顿法
if ((noValidPoints_new <= 0) || (f_new >= f_old))
{
trackingState->pose_d->SetFrom(&lastKnownGoodPose);
approxInvPose = trackingState->pose_d->GetInvM();
lambda *= 10.0f;
}
else
{
//更新迭代位姿和误差值
lastKnownGoodPose.SetFrom(trackingState->pose_d);
f_old = f_new;
for (int i = 0; i < 6 * 6; ++i) hessian_good[i] = hessian_new[i];
for (int i = 0; i < 6; ++i) nabla_good[i] = nabla_new[i];
lambda /= 10.0f;
// Also cache depth results
noValidPoints_depth_good = noValidPoints_depth;
f_depth_good = f_depth;
memcpy(hessian_depth_good, hessian_depth, sizeof(hessian_depth));
}
float A[6 * 6];
for (int i = 0; i < 6 * 6; ++i) A[i] = hessian_good[i];
for (int i = 0; i < 6; ++i) A[i + i * 6] *= 1.0f + lambda;
// compute a new step and make sure we've got an SE3
float step[6];
//求H△x=g的解,得到迭代步长并继续迭代
ComputeDelta(step, nabla_good, A, currentIterationType != TRACKER_ITERATION_BOTH);
ApplyDelta(approxInvPose, step, approxInvPose);
trackingState->pose_d->SetInvM(approxInvPose);
trackingState->pose_d->Coerce();
approxInvPose = trackingState->pose_d->GetInvM();
// if step is small, assume it's going to decrease the error and finish
if (HasConverged(step)) break;
}
}
//到最后用支持向量机来对本次位姿估计进行评分,所用的是一系列祖传参数
this->UpdatePoseQuality(noValidPoints_depth_good, hessian_depth_good, f_depth_good);
}
2.1 雅可比矩阵和海塞矩阵的计算
int ITMExtendedTracker_CPU::ComputeGandH_Depth(float &f, float *nabla, float *hessian, Matrix4f approxInvPose)
{
//从模型中获取到三维坐标点和法向量
Vector4f *pointsMap = sceneHierarchyLevel_Depth->pointsMap->GetData(MEMORYDEVICE_CPU);
Vector4f *normalsMap = sceneHierarchyLevel_Depth->normalsMap->GetData(MEMORYDEVICE_CPU);
//内参
Vector4f sceneIntrinsics = sceneHierarchyLevel_Depth->intrinsics;
Vector2i sceneImageSize = sceneHierarchyLevel_Depth->pointsMap->noDims;
//本帧深度图
float *depth = viewHierarchyLevel_Depth->depth->GetData(MEMORYDEVICE_CPU);
Vector4f viewIntrinsics = viewHierarchyLevel_Depth->intrinsics;
Vector2i viewImageSize = viewHierarchyLevel_Depth->depth->noDims;
if (currentIterationType == TRACKER_ITERATION_NONE) return 0;
bool shortIteration = (currentIterationType == TRACKER_ITERATION_ROTATION)
|| (currentIterationType == TRACKER_ITERATION_TRANSLATION);
float sumHessian[6 * 6], sumNabla[6], sumF; int noValidPoints;
//做一个上三角矩阵来依次填入海塞矩阵
int noPara = shortIteration ? 3 : 6, noParaSQ = shortIteration ? 3 + 2 + 1 : 6 + 5 + 4 + 3 + 2 + 1;
noValidPoints = 0; sumF = 0.0f;
memset(sumHessian, 0, sizeof(float) * noParaSQ);
memset(sumNabla, 0, sizeof(float) * noPara);
for (int y = 0; y < viewImageSize.y; y++) for (int x = 0; x < viewImageSize.x; x++)
{
float localHessian[6 + 5 + 4 + 3 + 2 + 1], localNabla[6], localF = 0;
for (int i = 0; i < noPara; i++) localNabla[i] = 0.0f;
for (int i = 0; i < noParaSQ; i++) localHessian[i] = 0.0f;
bool isValidPoint;
float depthWeight;
......
isValidPoint = computePerPointGH_exDepth<true, true, false>(localNabla, localHessian, localF, x, y, depth[x + y * viewImageSize.x], depthWeight, viewImageSize, viewIntrinsics, sceneImageSize, sceneIntrinsics, approxInvPose, scenePose, pointsMap, normalsMap, spaceThresh[currentLevelId], viewFrustum_min, viewFrustum_max, tukeyCutOff, framesToSkip, framesToWeight);
if (isValidPoint)
{
noValidPoints++;
sumF += localF;
for (int i = 0; i < noPara; i++) sumNabla[i] += localNabla[i];
for (int i = 0; i < noParaSQ; i++) sumHessian[i] += localHessian[i];
}
}
//依次填入海塞矩阵的上三角和下三角
// Copy the lower triangular part of the matrix.
for (int r = 0, counter = 0; r < noPara; r++)
for (int c = 0; c <= r; c++, counter++)
hessian[r + c * 6] = sumHessian[counter];
// Transpose to fill the upper triangle.
for (int r = 0; r < noPara; ++r)
for (int c = r + 1; c < noPara; c++)
hessian[r + c * 6] = hessian[c + r * 6];
//填入雅可比矩阵
memcpy(nabla, sumNabla, noPara * sizeof(float));
f = sumF;//最终的误差函数值,用来比较是否收敛
return noValidPoints;
}
深入进去会发现它在查看深度图未缺失的部分,并计算每个深度点的误差值以及雅克比矩阵与海塞矩阵。对于整个具体推算可以参考《机器人学中的状态估计》的李代数求导部分,每做一次迭代相当于在当前李代数的一侧加上一个扰动,我们对扰动求导并令导数为0,可以得到最佳的扰动量。
template<bool shortIteration, bool rotationOnly, bool useWeights>
_CPU_AND_GPU_CODE_ inline bool computePerPointGH_exDepth(THREADPTR(float) *localNabla, THREADPTR(float) *localHessian, THREADPTR(float) &localF,
const THREADPTR(int) & x, const THREADPTR(int) & y, const CONSTPTR(float) &depth, THREADPTR(float) &depthWeight, CONSTPTR(Vector2i) & viewImageSize, const CONSTPTR(Vector4f) & viewIntrinsics,
const CONSTPTR(Vector2i) & sceneImageSize, const CONSTPTR(Vector4f) & sceneIntrinsics, const CONSTPTR(Matrix4f) & approxInvPose, const CONSTPTR(Matrix4f) & scenePose,
const CONSTPTR(Vector4f) *pointsMap, const CONSTPTR(Vector4f) *normalsMap, float spaceThresh, float viewFrustum_min, float viewFrustum_max, float tukeyCutOff, int framesToSkip, int framesToWeight)
{
const int noPara = shortIteration ? 3 : 6;
float A[noPara];
float b;
bool ret = computePerPointGH_exDepth_Ab<shortIteration, rotationOnly, useWeights>(A, b, x, y, depth, depthWeight, viewImageSize, viewIntrinsics, sceneImageSize, sceneIntrinsics,
approxInvPose, scenePose, pointsMap, normalsMap, spaceThresh, viewFrustum_min, viewFrustum_max, tukeyCutOff, framesToSkip, framesToWeight);
if (!ret) return false;
//将每个点的误差累计得到本帧的误差函数
localF = rho(b, spaceThresh) * depthWeight;
#if (defined(__CUDACC__) && defined(__CUDA_ARCH__)) || (defined(__METALC__))
#pragma unroll
#endif
//localNabla是rho函数魔改后的雅可比矩阵J,localHessian是近似海塞矩阵JTJ
for (int r = 0, counter = 0; r < noPara; r++)
{
localNabla[r] = rho_deriv(b, spaceThresh) * depthWeight * A[r];
#if (defined(__CUDACC__) && defined(__CUDA_ARCH__)) || (defined(__METALC__))
#pragma unroll
#endif
for (int c = 0; c <= r; c++, counter++) localHessian[counter] = rho_deriv2(b, spaceThresh) * depthWeight * A[r] * A[c];
}
return true;
}
template<bool shortIteration, bool rotationOnly, bool useWeights>
_CPU_AND_GPU_CODE_ inline bool computePerPointGH_exDepth_Ab(THREADPTR(float) *A, THREADPTR(float) &b,
const THREADPTR(int) & x, const THREADPTR(int) & y, const CONSTPTR(float) &depth, THREADPTR(float) &depthWeight,
const CONSTPTR(Vector2i) & viewImageSize, const CONSTPTR(Vector4f) & viewIntrinsics, const CONSTPTR(Vector2i) & sceneImageSize,
const CONSTPTR(Vector4f) & sceneIntrinsics, const CONSTPTR(Matrix4f) & approxInvPose, const CONSTPTR(Matrix4f) & scenePose, const CONSTPTR(Vector4f) *pointsMap,
const CONSTPTR(Vector4f) *normalsMap, float spaceThresh, float viewFrustum_min, float viewFrustum_max, float tukeyCutOff, int framesToSkip, int framesToWeight)
{
depthWeight = 0;
//一般深度图缺失的点,都会设为深度0
if (depth <= 1e-8f) return false; //check if valid -- != 0.0f
Vector4f tmp3Dpoint, tmp3Dpoint_reproj; Vector3f ptDiff;
Vector4f curr3Dpoint, corr3Dnormal; Vector2f tmp2Dpoint;
//将深度按内参映射为相机坐标系下的三维坐标
tmp3Dpoint.x = depth * ((float(x) - viewIntrinsics.z) / viewIntrinsics.x);
tmp3Dpoint.y = depth * ((float(y) - viewIntrinsics.w) / viewIntrinsics.y);
tmp3Dpoint.z = depth;
tmp3Dpoint.w = 1.0f;
//按上次计算的位姿,将相机坐标系转换到上个位姿的视角
// transform to previous frame coordinates
tmp3Dpoint = approxInvPose * tmp3Dpoint;
tmp3Dpoint.w = 1.0f;
// project into previous rendered image
//只计算与模型重合的点,因此要尽可能多与模型重合(运动要慢)
tmp3Dpoint_reproj = scenePose * tmp3Dpoint;
if (tmp3Dpoint_reproj.z <= 0.0f) return false;
//投影到相机坐标
tmp2Dpoint.x = sceneIntrinsics.x * tmp3Dpoint_reproj.x / tmp3Dpoint_reproj.z + sceneIntrinsics.z;
tmp2Dpoint.y = sceneIntrinsics.y * tmp3Dpoint_reproj.y / tmp3Dpoint_reproj.z + sceneIntrinsics.w;
//像素不越界
if (!((tmp2Dpoint.x >= 0.0f) && (tmp2Dpoint.x <= sceneImageSize.x - 2) && (tmp2Dpoint.y >= 0.0f) && (tmp2Dpoint.y <= sceneImageSize.y - 2)))
return false;
//插补
curr3Dpoint = interpolateBilinear_withHoles(pointsMap, tmp2Dpoint, sceneImageSize);
if (curr3Dpoint.w < 0.0f) return false;
//对应的模型点到当前深度点的向量,称为当前点的法向量
ptDiff.x = curr3Dpoint.x - tmp3Dpoint.x;
ptDiff.y = curr3Dpoint.y - tmp3Dpoint.y;
ptDiff.z = curr3Dpoint.z - tmp3Dpoint.z;
float dist = ptDiff.x * ptDiff.x + ptDiff.y * ptDiff.y + ptDiff.z * ptDiff.z;
//过滤掉异常深度的点
if (dist > tukeyCutOff * spaceThresh) return false;
//从原有模型中得到的点与法向量
corr3Dnormal = interpolateBilinear_withHoles(normalsMap, tmp2Dpoint, sceneImageSize);
//if (corr3Dnormal.w < 0.0f) return false;
depthWeight = MAX(0.0f, 1.0f - (depth - viewFrustum_min) / (viewFrustum_max - viewFrustum_min));
depthWeight *= depthWeight;
if (useWeights)
{
if (curr3Dpoint.w < framesToSkip) return false;
depthWeight *= (curr3Dpoint.w - framesToSkip) / framesToWeight;
}
//误差函数
b = corr3Dnormal.x * ptDiff.x + corr3Dnormal.y * ptDiff.y + corr3Dnormal.z * ptDiff.z;
// TODO check whether normal matches normal from image, done in the original paper, but does not seem to be required
......
//根据链式求导,误差函数是两个向量乘积的二范数,当前求得的法向量对变换矩阵的偏导数(转换为李代数形式)
{
A[0] = +tmp3Dpoint.z * corr3Dnormal.y - tmp3Dpoint.y * corr3Dnormal.z;
A[1] = -tmp3Dpoint.z * corr3Dnormal.x + tmp3Dpoint.x * corr3Dnormal.z;
A[2] = +tmp3Dpoint.y * corr3Dnormal.x - tmp3Dpoint.x * corr3Dnormal.y;
//因此和上面一样,误差函数对当前求得的法向量的偏导数就是模型点的法向量的本身!
A[!shortIteration ? 3 : 0] = corr3Dnormal.x; A[!shortIteration ? 4 : 1] = corr3Dnormal.y; A[!shortIteration ? 5 : 2] = corr3Dnormal.z;
}
return true;
}
2.2 迭代运算步长和误差
计算步长的部分转到Cholesky.h,这是在求线性方程的解,即H△x=g的过程。
GenericCholesky(const F *mat, int size)
{
this->size = size;
this->cholesky.resize(size*size);
for (int i = 0; i < size * size; i++) cholesky[i] = mat[i];
for (int c = 0; c < size; c++)
{
F inv_diag = 1;
for (int r = c; r < size; r++)
{
F val = cholesky[c + r * size];
for (int c2 = 0; c2 < c; c2++)
val -= cholesky[c + c2 * size] * cholesky[c2 + r * size];
if (r == c)
{
cholesky[c + r * size] = val;
if (val == 0) { rank = r; }
inv_diag = 1.0f / val;
}
else
{
cholesky[r + c * size] = val;
cholesky[c + r * size] = val * inv_diag;
}
}
}
rank = size;
}
void Backsub(F *result, const F *v) const
{
std::vector<F> y(size);
for (int i = 0; i < size; i++)
{
F val = v[i];
for (int j = 0; j < i; j++) val -= cholesky[j + i * size] * y[j];
y[i] = val;
}
for (int i = 0; i < size; i++) y[i] /= cholesky[i + i * size];
for (int i = size - 1; i >= 0; i--)
{
F val = y[i];
for (int j = i + 1; j < size; j++) val -= cholesky[i + j * size] * result[j];
result[i] = val;
}
}
计算得到的步长step是6维数组,代表着李代数的6个维度的扰动,也就是说它是由3个旋转向量和3个平移向量组成,平移向量可以直接写在变换矩阵最右侧,而旋转矩阵需要将扰动转为反对称矩阵。这样便实现了一次迭代计算,直到误差函数足够小结束整个迭代。
void ITMExtendedTracker::ApplyDelta(const Matrix4f & para_old, const float *delta, Matrix4f & para_new) const
{
float step[6];
switch (currentIterationType)
{
case TRACKER_ITERATION_ROTATION:
step[0] = (float)(delta[0]); step[1] = (float)(delta[1]); step[2] = (float)(delta[2]);
step[3] = 0.0f; step[4] = 0.0f; step[5] = 0.0f;
break;
case TRACKER_ITERATION_TRANSLATION:
step[0] = 0.0f; step[1] = 0.0f; step[2] = 0.0f;
step[3] = (float)(delta[0]); step[4] = (float)(delta[1]); step[5] = (float)(delta[2]);
break;
default:
case TRACKER_ITERATION_BOTH:
step[0] = (float)(delta[0]); step[1] = (float)(delta[1]); step[2] = (float)(delta[2]);
step[3] = (float)(delta[3]); step[4] = (float)(delta[4]); step[5] = (float)(delta[5]);
break;
}
Matrix4f Tinc;
Tinc.m00 = 1.0f; Tinc.m10 = step[2]; Tinc.m20 = -step[1]; Tinc.m30 = step[3];
Tinc.m01 = -step[2]; Tinc.m11 = 1.0f; Tinc.m21 = step[0]; Tinc.m31 = step[4];
Tinc.m02 = step[1]; Tinc.m12 = -step[0]; Tinc.m22 = 1.0f; Tinc.m32 = step[5];
Tinc.m03 = 0.0f; Tinc.m13 = 0.0f; Tinc.m23 = 0.0f; Tinc.m33 = 1.0f;
para_new = Tinc * para_old;
}
至于建模部分,属于对tsdf模型的反复更新,这一块暂时不作讲解了,有更多关于fusion系列的博客可供阅读。总的来说,InfiniTAM的效果似乎还不错,代码也比较有条理,是三维重建中的一个好的切入点。