VINS-Mono源码分析3— vins_estimator2
本文的分析对象是vins_estimator包下estimator.cpp文件中的processImage()函数代码。话不多说,下面开始分析,
ROS_DEBUG("new image coming ------------------------------------------");
ROS_DEBUG("Adding feature points %lu", image.size());
if (f_manager.addFeatureCheckParallax(frame_count, image, td))
marginalization_flag = MARGIN_OLD;
else
marginalization_flag = MARGIN_SECOND_NEW;
这段代码实现的功能就是,如果frame_count-1帧与frame_count-2帧有足够的视差,则将最老帧(frame_count=0) 设为边缘化帧,否则将次新帧(frame_count-1帧) 设为边缘化帧。这段代码的核心函数就是addFeatureCheckParallax(),它的具体代码实现如下,
bool FeatureManager::addFeatureCheckParallax(int frame_count, const map<int, vector<pair<int, Eigen::Matrix<double, 7, 1>>>> &image, double td)
{
ROS_DEBUG("input feature: %d", (int)image.size());
ROS_DEBUG("num of feature: %d", getFeatureCount());
double parallax_sum = 0;
int parallax_num = 0;
last_track_num = 0;
for (auto &id_pts : image)
{
//把frame_count帧的特征点依次存储在f_per_fra中
FeaturePerFrame f_per_fra(id_pts.second[0].second, td);
//特征点的feature_id
int feature_id = id_pts.first;
//在feature中查找ID值为feature_id的特征点
auto it = find_if(feature.begin(), feature.end(), [feature_id](const FeaturePerId &it)
{
return it.feature_id == feature_id;
});
//如果在feature中没有找到ID值为feature_id的特征点
if (it == feature.end())
{
//将feature_id和frame_count打包成FeaturePerId类数据,存储到feature中
feature.push_back(FeaturePerId(feature_id, frame_count));
//在feature的最后一个元素中的feature_per_frame中添加f_per_fra
//表示feature_id特征点被frame_count帧观测到
feature.back().feature_per_frame.push_back(f_per_fra);
}
//如果在feature中找到ID值为feature_id的特征点
else if (it->feature_id == feature_id)
{
//在feature的其中一个元素(it对应的)中的feature_per_frame中添加f_per_fra
//表示feature_id特征点被frame_count帧观测到
it->feature_per_frame.push_back(f_per_fra);
//frame_count帧中被追踪到的特征点总数加1
last_track_num++;
}
}
//如果frame_count帧小于2或该帧被追踪到的特征点总数小于20,那就直接返回。
if (frame_count < 2 || last_track_num < 20)
return true;
for (auto &it_per_id : feature)
{
//这个特征点的起始帧小于frame_count-2,并且观测到这个特征点的帧数得要足够多
if (it_per_id.start_frame <= frame_count - 2 &&
it_per_id.start_frame + int(it_per_id.feature_per_frame.size()) - 1 >= frame_count - 1)
{
//compensatedParallax2()计算视差的函数,得到总视差parallax_sum
parallax_sum += compensatedParallax2(it_per_id, frame_count);
//参与计算总视差的特征点数加1
parallax_num++;
}
}
//没有特征点参与计算总视差
if (parallax_num == 0)
{
return true;
}
else
{
ROS_DEBUG("parallax_sum: %lf, parallax_num: %d", parallax_sum, parallax_num);
ROS_DEBUG("current parallax: %lf", parallax_sum / parallax_num * FOCAL_LENGTH);
//平均视差要大于等于一个阈值MIN_PARALLAX
return parallax_sum / parallax_num >= MIN_PARALLAX;
}
}
这部分代码的解析全都写在代码注释里了,这里还是要提一下feature,它是一个特征点管理器,里面存储着滑窗中所有的特征点信息。接下来分析compensatedParallax2() 函数,
double FeatureManager::compensatedParallax2(const FeaturePerId &it_per_id, int frame_count)
{
//check the second last frame is keyframe or not
//parallax betwwen seconde last frame and third last frame
const FeaturePerFrame &frame_i = it_per_id.feature_per_frame[frame_count - 2 - it_per_id.start_frame];
const FeaturePerFrame &frame_j = it_per_id.feature_per_frame[frame_count - 1 - it_per_id.start_frame];
double ans = 0;
Vector3d p_j = frame_j.point;
double u_j = p_j(0);
double v_j = p_j(1);
Vector3d p_i = frame_i.point;
Vector3d p_i_comp;
p_i_comp = p_i;
double dep_i = p_i(2);
double u_i = p_i(0) / dep_i;
double v_i = p_i(1) / dep_i;
double du = u_i - u_j, dv = v_i - v_j;
double dep_i_comp = p_i_comp(2);
double u_i_comp = p_i_comp(0) / dep_i_comp;
double v_i_comp = p_i_comp(1) / dep_i_comp;
double du_comp = u_i_comp - u_j, dv_comp = v_i_comp - v_j;
ans = max(ans, sqrt(min(du * du + dv * dv, du_comp * du_comp + dv_comp * dv_comp)));
return ans;
}
这个函数实现计算单个特征点视差的具体描述为,找到it_per_id特征点在frame_count-1帧与frame_count-2帧下的相机系归一化坐标p_j 与p_i,求p_j 与p_i坐标对应位置的差的平方和,然后开根号,并将之做为视差的描述。
返回到processImage()函数,继续往下阅读代码,
Headers[frame_count] = header;
ImageFrame imageframe(image, header.stamp.toSec());
imageframe.pre_integration = tmp_pre_integration;
all_image_frame.insert(make_pair(header.stamp.toSec(), imageframe));
tmp_pre_integration = new IntegrationBase{
acc_0, gyr_0, Bas[frame_count], Bgs[frame_count]};
Headers[frame_count]也是一个重要的变量,经常作为各种条件判断的条件使用。
利用frame_count帧的图像特征点信息构造ImageFrame类实例imageframe,将临时预积分信息tmp_pre_integration添加到imageframe中,然后再把imageframe添加到all_image_frame中。最后重新定义tmp_pre_integration。
if(ESTIMATE_EXTRINSIC == 2)
{
ROS_INFO("calibrating extrinsic param, rotation movement is needed");
if (frame_count != 0)
{
vector<pair<Vector3d, Vector3d>> corres = f_manager.getCorresponding(frame_count - 1, frame_count);
Matrix3d calib_ric;
if (initial_ex_rotation.CalibrationExRotation(corres, pre_integrations[frame_count]->delta_q, calib_ric))
{
ROS_WARN("initial extrinsic rotation calib success");
ROS_WARN_STREAM("initial extrinsic rotation: " << endl << calib_ric);
ric[0] = calib_ric;
RIC[0] = calib_ric;
ESTIMATE_EXTRINSIC = 1;
}
}
}
如果不需要标定外参,则将ESTIMATE_EXTRINSIC置0,默认情况下它的值为0。
if (solver_flag == INITIAL)
{
......
}
初始化是整个VINS-Mono的重中之重,接下来阅读初始化部分的代码,
result = initialStructure();
bool Estimator::initialStructure()
{
......
}
先看下面的代码
map<double, ImageFrame>::iterator frame_it;
Vector3d sum_g;
for (frame_it = all_image_frame.begin(), frame_it++; frame_it != all_image_frame.end(); frame_it++)
{
double dt = frame_it->second.pre_integration->sum_dt;
Vector3d tmp_g = frame_it->second.pre_integration->delta_v / dt;
sum_g += tmp_g;
}
Vector3d aver_g;
aver_g = sum_g * 1.0 / ((int)all_image_frame.size() - 1);
double var = 0;
for (frame_it = all_image_frame.begin(), frame_it++; frame_it != all_image_frame.end(); frame_it++)
{
double dt = frame_it->second.pre_integration->sum_dt;
Vector3d tmp_g = frame_it->second.pre_integration->delta_v / dt;
var += (tmp_g - aver_g).transpose() * (tmp_g - aver_g);
//cout << "frame g " << tmp_g.transpose() << endl;
}
var = sqrt(var / ((int)all_image_frame.size() - 1));
//ROS_WARN("IMU variation %f!", var);
if(var < 0.25)
{
ROS_INFO("IMU excitation not enouth!");
//return false;
}
第一次for循环,求出滑窗内总的线加速度sum_g,然后求出平均线加速度aver_g,第二for次循环,求出滑窗内的线加速度的标准差var,用标准差var判断IMU是否有足够的运动激励。
vector<SFMFeature> sfm_f;
for (auto &it_per_id : f_manager.feature)
{
int imu_j = it_per_id.start_frame - 1;
SFMFeature tmp_feature;
tmp_feature.state = false;
tmp_feature.id = it_per_id.feature_id;
for (auto &it_per_frame : it_per_id.feature_per_frame)
{
imu_j++;
Vector3d pts_j = it_per_frame.point;
tmp_feature.observation.push_back(make_pair(imu_j, Eigen::Vector2d{
pts_j.x(), pts_j.y()}));
}
sfm_f.push_back(tmp_feature);
}
将特征点管理器f_manager中的特征点信息,全部重新存储到一个新的数据结构sfm_f中,这个sfm_f将在纯视觉SFM操作中被用到。
Matrix3d relative_R;
Vector3d relative_T;
int l;
if (!relativePose(relative_R, relative_T, l))
{
ROS_INFO("Not enough features or parallax; Move device around");
return false;
}
找到滑动窗口中与frame_count帧有足够视差的第l帧(0 ⩽ \leqslant ⩽ l ⩽ \leqslant ⩽ 9),并且求出这两帧间的相对旋转relative_R和相对平移relative_T,相当于frame_count帧在l帧下的旋转和平移,也可以理解为把frame_count帧下的点坐标转换到l帧下的旋转和平移。具体实现代码在relativePose() 函数中,
bool Estimator::relativePose(Matrix3d &relative_R, Vector3d &relative_T, int &l)
{
// find previous frame which contians enough correspondance and parallex with newest frame
for (int i = 0; i < WINDOW_SIZE; i++)
{
vector<pair<Vector3d, Vector3d>> corres;
//找到第i帧和frame_count帧的共同特征点分别在两帧下的归一化相机坐标,存储在corres中
corres = f_manager.getCorresponding(i, WINDOW_SIZE);
//两帧的共同特征点得要多于20个
if (corres.size() > 20)
{
double sum_parallax = 0;
double average_parallax;
for (int j = 0; j < int(corres.size()); j++)
{
//在两帧下的归一化坐标的(x,y)分别赋给pts_0和pts_1
Vector2d pts_0(corres[j].first(0), corres[j].first(1));
Vector2d pts_1(corres[j].second(0), corres[j].second(1));
//差值的模,然后累加得到总视差sum_parallax
double parallax = (pts_0 - pts_1).norm();
sum_parallax = sum_parallax + parallax;
}
//平均视差
average_parallax = 1.0 * sum_parallax / int(corres.size());
//平均视差的460倍大于30,并且两帧间的相对旋转和平移能够计算出来
if(average_parallax * 460 > 30 && m_estimator.solveRelativeRT(corres, relative_R, relative_T))
{
//将第i帧设为第l帧
l = i;
ROS_DEBUG("average_parallax %f choose l %d and newest frame to triangulate the whole structure", average_parallax * 460, l);
return true;
}
}
}
return false;
}
这部分代码的getCorresponding() 函数和solveRelativeRT() 函数的具体代码实现如下,
vector<pair<Vector3d, Vector3d>> FeatureManager::getCorresponding(int frame_count_l, int frame_count_r)
{
vector<pair<Vector3d, Vector3d>> corres;
for (auto &it : feature)
{
//确保it特征点能被rame_count_l帧和rame_count_r帧观测到
if (it.start_frame <= frame_count_l && it.endFrame() >= frame_count_r)
{
Vector3d a = Vector3d::Zero(), b = Vector3d::Zero();
//it特征点对frame_count_l帧和frame_count_r帧的观测索引
int idx_l = frame_count_l - it.start_frame;
int idx_r = frame_count_r - it.start_frame;
//it特征点在frame_count_l帧的相机归一化坐标
a = it.feature_per_frame[idx_l].point;
//it特征点在frame_count_r帧的相机归一化坐标
b = it.feature_per_frame[idx_r].point;
//it特征点在frame_count_l帧和frame_count_r帧的相机归一化坐标成对存储
corres.push_back(make_pair(a, b));
}
}
return corres;
}
getCorresponding() 函数的解析全都写在代码注释里了,接下来分析solveRelativeRT() 函数,
bool MotionEstimator::solveRelativeRT(const vector<pair<Vector3d, Vector3d>> &corres, Matrix3d &Rotation, Vector3d &Translation)
{
if (corres.size() >= 15)
{
vector<cv::Point2f> ll, rr;
for (int i = 0; i < int(corres.size()); i++)
{
ll.push_back(cv::Point2f(corres[i].first(0), corres[i].first(1)));
rr.push_back(cv::Point2f(corres[i].second(0), corres[i].second(1)));
}
cv::Mat mask;
cv::Mat E = cv::findFundamentalMat(ll, rr, cv::FM_RANSAC, 0.3 / 460, 0.99, mask);
cv::Mat cameraMatrix = (cv::Mat_<double>(3, 3) << 1, 0, 0, 0, 1, 0, 0, 0, 1);
cv::Mat rot, trans;
int inlier_cnt = cv::recoverPose(E, ll, rr, cameraMatrix, rot, trans, mask);
//cout << "inlier_cnt " << inlier_cnt << endl;
Eigen::Matrix3d R;
Eigen::Vector3d T;
for (int i = 0; i < 3; i++)
{
T(i) = trans.at<double>(i, 0);
for (int j = 0; j < 3; j++)
R(i, j) = rot.at<double>(i, j);
}
Rotation = R.transpose();
Translation = -R.transpose() * T;
//内点数大于12,成功
if(inlier_cnt > 12)
return true;
else
return false;
}
return false;
}
这部分代码在initial文件夹下的solve_5pts.cpp文件中。ll和rr都是相机归一化坐标,所以通过OpenCV相应库函数解出来的是本质矩阵E,然后再通过相应库函数SVD分解出旋转R和平移T,求逆,得到Rotation和Translation,即relative_R和relative_T。这部分理论知识可以参考高翔的《视觉SLAM十四件》的7.3的内容。
接下来返回到函数initialStructure() 中,继续阅读下面的代码,
GlobalSFM sfm;
if(!sfm.construct(frame_count + 1, Q, T, l,
relative_R, relative_T,
sfm_f, sfm_tracked_points))
{
ROS_DEBUG("global SFM failed!");
marginalization_flag = MARGIN_OLD;
return false;
}
根据上面算出来的l、relative_R、 relative_T求出Q[1~11] 、T[1~11]、sfm_f和sfm_tracked_points,其中Q[1~11] 和T[1~11] 的参考坐标系是l帧。核心代码是construct()函数,它在initial文件夹下的initial_sfm.cpp文件中,这个函数中的代码大意可以用下图表示,
q[l].w() = 1;
q[l].x() = 0;
q[l].y() = 0;
q[l].z() = 0;
T[l].setZero();
//frame_num - 1=10
q[frame_num - 1] = q[l] * Quaterniond(relative_R);
T[frame_num - 1] = relative_T;
q[l]、T[l] 和 q[10]、T[10] 可以看作是l帧和10帧在以l帧为参考坐标系下的旋转和平移。
Matrix3d c_Rotation[frame_num];
Vector3d c_Translation[frame_num];
Quaterniond c_Quat[frame_num];
double c_rotation[frame_num][4];
double c_translation[frame_num][3];
Eigen::Matrix<double, 3, 4> Pose[frame_num];
定义一些在construct() 函数中临时使用的中间变量。
c_Quat[l] = q[l].inverse();
c_Rotation[l] = c_Quat[l].toRotationMatrix();
c_Translation[l] = -1 * (c_Rotation[l] * T[l]);
Pose[l].block<3, 3>(0, 0) = c_Rotation[l];
Pose[l].block<3, 1>(0, 3) = c_Translation[l];
c_Quat[frame_num - 1] = q[frame_num - 1].inverse();
c_Rotation[frame_num - 1] = c_Quat[frame_num - 1].toRotationMatrix();
c_Translation[frame_num - 1] = -1 * (c_Rotation[frame_num - 1] * T[frame_num - 1]);
Pose[frame_num - 1].block<3, 3>(0, 0) = c_Rotation[frame_num - 1];
Pose[frame_num - 1].block<3, 1>(0, 3) = c_Translation[frame_num - 1];
求q[l]、T[l] 和 q[10]、T[10] 的逆,并存储在Pose[l] 和 Pose[10] 中。
for (int