总体思路
- 输入数据:rgb图及时间戳,深度图及时间戳
- 初始化帧
- 参考帧和当前帧都设置为输入帧
- 插入关键帧(含frame及其id)
- 提取特征点
- 计算描述子
- 求参考帧中特征点的3D点坐标(setRef3DPoints,参考帧像素的3D坐标,由相机内参和depth获得)
- 其他帧
- 当前帧更新为输入帧
- 提取关键点
- 计算描述子
- 特征匹配
- PnP进行位姿估计solvePnPRansac
- 带优化的版本,在这之后加入了BA优化
- 顶点:T(相机位姿)
- 边:3D-2D的投影,即参考帧的3D坐标,经过相机位姿变换,然后由内参投影到第二幅图中,误差为投影坐标与真实提取的像素坐标的差(重投影误差)。
- 带优化的版本,在这之后加入了BA优化
- 检查估计的位姿
- 如果位姿好
- 当前帧的位姿为参考帧位姿经过估计的位姿变换得到:curr_->T_c_w_ = T_c_r_estimated_ * ref_->T_c_w_; // T_c_w = T_c_r*T_r_w
- 参考帧设置为当前帧
- 求参考帧中特征点的3D点坐标(setRef3DPoints)
- 检查是否是关键帧(分别看旋转向量和平移向量是否超出阈值,小于阈值就作为关键帧)
- 如果是关键帧,将当前帧插入地图的关键帧中
- 如果位姿不好
- 丢失的特征数量计数加一
- 如果丢帧数量大于丢帧设定的阈值,将state_变量设置为LOST
- 丢失的特征数量计数加一
- 如果位姿好
参考代码(仅贴出主要函数,详细工程参考高翔博士的gitHub)
run_vo.cpp
// -------------- test the visual odometry -------------
#include <fstream>
#include <boost/timer.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/viz.hpp>
#include "myslam/config.h"
#include "myslam/visual_odometry.h"
int main ( int argc, char** argv )
{
if ( argc != 2 )
{
cout<<"usage: run_vo parameter_file"<<endl;
return 1;
}
myslam::Config::setParameterFile ( argv[1] );
myslam::VisualOdometry::Ptr vo ( new myslam::VisualOdometry );
string dataset_dir = myslam::Config::get<string> ( "dataset_dir" );
cout<<"dataset: "<<dataset_dir<<endl;
ifstream fin ( dataset_dir+"/associate.txt" );
if ( !fin )
{
cout<<"please generate the associate file called associate.txt!"<<endl;
return 1;
}
vector<string> rgb_files, depth_files;
vector<double> rgb_times, depth_times;
while ( !fin.eof() )
{
string rgb_time, rgb_file, depth_time, depth_file;
fin>>rgb_time>>rgb_file>>depth_time>>depth_file;
rgb_times.push_back ( atof ( rgb_time.c_str() ) );
depth_times.push_back ( atof ( depth_time.c_str() ) );
rgb_files.push_back ( dataset_dir+"/"+rgb_file );
depth_files.push_back ( dataset_dir+"/"+depth_file );
if ( fin.good() == false )
break;
}
myslam::Camera::Ptr camera ( new myslam::Camera );
// visualization
cv::viz::Viz3d vis("Visual Odometry");
cv::viz::WCoordinateSystem world_coor(1.0), camera_coor(0.5);
cv::Point3d cam_pos( 0, -1.0, -1.0 ), cam_focal_point(0,0,0), cam_y_dir(0,1,0);
cv::Affine3d cam_pose = cv::viz::makeCameraPose( cam_pos, cam_focal_point, cam_y_dir );
vis.setViewerPose( cam_pose );
world_coor.setRenderingProperty(cv::viz::LINE_WIDTH, 2.0);
camera_coor.setRenderingProperty(cv::viz::LINE_WIDTH, 1.0);
vis.showWidget( "World", world_coor );
vis.showWidget( "Camera", camera_coor );
cout<<"read total "<<rgb_files.size() <<" entries"<<endl;
for ( int i=0; i<rgb_files.size(); i++ )
{
Mat color = cv::imread ( rgb_files[i] );
Mat depth = cv::imread ( depth_files[i], -1 );
if ( color.data==nullptr || depth.data==nullptr )
break;
myslam::Frame::Ptr pFrame = myslam::Frame::createFrame();
pFrame->camera_ = camera;
pFrame->color_ = color;
pFrame->depth_ = depth;
pFrame->time_stamp_ = rgb_times[i];
boost::timer timer;
vo->addFrame ( pFrame );
cout<<"VO costs time: "<<timer.elapsed()<<endl;
if ( vo->state_ == myslam::VisualOdometry::LOST )
break;
SE3 Tcw = pFrame->T_c_w_.inverse();
// show the map and the camera pose
cv::Affine3d M(
cv::Affine3d::Mat3(
Tcw.rotation_matrix()(0,0), Tcw.rotation_matrix()(0,1), Tcw.rotation_matrix()(0,2),
Tcw.rotation_matrix()(1,0), Tcw.rotation_matrix()(1,1), Tcw.rotation_matrix()(1,2),
Tcw.rotation_matrix()(2,0), Tcw.rotation_matrix()(2,1), Tcw.rotation_matrix()(2,2)
),
cv::Affine3d::Vec3(
Tcw.translation()(0,0), Tcw.translation()(1,0), Tcw.translation()(2,0)
)
);
cv::imshow("image", color );
cv::waitKey(1);
vis.setWidgetPose( "Camera", M);
vis.spinOnce(1, false);
}
return 0;
}
不带优化的visual_odometry.cpp
/*
* <one line to give the program's name and a brief idea of what it does.>
* Copyright (C) 2016 <copyright holder> <email>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <algorithm>
#include <boost/timer.hpp>
#include "myslam/config.h"
#include "myslam/visual_odometry.h"
namespace myslam
{
VisualOdometry::VisualOdometry() :
state_ ( INITIALIZING ), ref_ ( nullptr ), curr_ ( nullptr ), map_ ( new Map ), num_lost_ ( 0 ), num_inliers_ ( 0 )
{
num_of_features_ = Config::get<int> ( "number_of_features" );
scale_factor_ = Config::get<double> ( "scale_factor" );
level_pyramid_ = Config::get<int> ( "level_pyramid" );
match_ratio_ = Config::get<float> ( "match_ratio" );
max_num_lost_ = Config::get<float> ( "max_num_lost" );
min_inliers_ = Config::get<int> ( "min_inliers" );
key_frame_min_rot = Config::get<double> ( "keyframe_rotation" );
key_frame_min_trans = Config::get<double> ( "keyframe_translation" );
orb_ = cv::ORB::create ( num_of_features_, scale_factor_, level_pyramid_ );
}
VisualOdometry::~VisualOdometry()
{
}
bool VisualOdometry::addFrame ( Frame::Ptr frame )
{
switch ( state_ )
{
case INITIALIZING:
{
state_ = OK;
curr_ = ref_ = frame;
map_->insertKeyFrame ( frame );
// extract features from first frame
extractKeyPoints();
computeDescriptors();
// compute the 3d position of features in ref frame
setRef3DPoints();
break;
}
case OK:
{
curr_ = frame;
extractKeyPoints();
computeDescriptors();
featureMatching();
poseEstimationPnP();
if ( checkEstimatedPose() == true ) // a good estimation
{
curr_->T_c_w_ = T_c_r_estimated_ * ref_->T_c_w_; // T_c_w = T_c_r*T_r_w
ref_ = curr_;
setRef3DPoints();
num_lost_ = 0;
if ( checkKeyFrame() == true ) // is a key-frame
{
addKeyFrame();
}
}
else // bad estimation due to various reasons
{
num_lost_++;
if ( num_lost_ > max_num_lost_ )
{
state_ = LOST;
}
return false;
}
break;
}
case LOST:
{
cout<<"vo has lost."<<endl;
break;
}
}
return true;
}
void VisualOdometry::extractKeyPoints()
{
orb_->detect ( curr_->color_, keypoints_curr_ );
}
void VisualOdometry::computeDescriptors()
{
orb_->compute ( curr_->color_, keypoints_curr_, descriptors_curr_ );
}
void VisualOdometry::featureMatching()
{
// match desp_ref and desp_curr, use OpenCV's brute force match
vector<cv::DMatch> matches;
cv::BFMatcher matcher ( cv::NORM_HAMMING );
matcher.match ( descriptors_ref_, descriptors_curr_, matches );
// select the best matches
float min_dis = std::min_element (
matches.begin(), matches.end(),
[] ( const cv::DMatch& m1, const cv::DMatch& m2 )
{
return m1.distance < m2.distance;
} )->distance;
feature_matches_.clear();
for ( cv::DMatch& m : matches )
{
if ( m.distance < max<float> ( min_dis*match_ratio_, 30.0 ) )
{
feature_matches_.push_back(m);
}
}
cout<<"good matches: "<<feature_matches_.size()<<endl;
}
void VisualOdometry::setRef3DPoints()
{
// select the features with depth measurements
pts_3d_ref_.clear();
descriptors_ref_ = Mat();
for ( size_t i=0; i<keypoints_curr_.size(); i++ )
{
double d = ref_->findDepth(keypoints_curr_[i]);
if ( d > 0)
{
Vector3d p_cam = ref_->camera_->pixel2camera(
Vector2d(keypoints_curr_[i].pt.x, keypoints_curr_[i].pt.y), d
);
pts_3d_ref_.push_back( cv::Point3f( p_cam(0,0), p_cam(1,0), p_cam(2,0) ));
descriptors_ref_.push_back(descriptors_curr_.row(i));
}
}
}
void VisualOdometry::poseEstimationPnP()
{
// construct the 3d 2d observations
vector<cv::Point3f> pts3d;
vector<cv::Point2f> pts2d;
for ( cv::DMatch m:feature_matches_ )
{
pts3d.push_back( pts_3d_ref_[m.queryIdx] );
pts2d.push_back( keypoints_curr_[m.trainIdx].pt );
}
Mat K = ( cv::Mat_<double>(3,3)<<
ref_->camera_->fx_, 0, ref_->camera_->cx_,
0, ref_->camera_->fy_, ref_->camera_->cy_,
0,0,1
);
Mat rvec, tvec, inliers;
cv::solvePnPRansac( pts3d, pts2d, K, Mat(), rvec, tvec, false, 100, 4.0, 0.99, inliers );
num_inliers_ = inliers.rows;
cout<<"pnp inliers: "<<num_inliers_<<endl;
T_c_r_estimated_ = SE3(
SO3(rvec.at<double>(0,0), rvec.at<double>(1,0), rvec.at<double>(2,0)),
Vector3d( tvec.at<double>(0,0), tvec.at<double>(1,0), tvec.at<double>(2,0))
);
}
bool VisualOdometry::checkEstimatedPose()
{
// check if the estimated pose is good
if ( num_inliers_ < min_inliers_ )
{
cout<<"reject because inlier is too small: "<<num_inliers_<<endl;
return false;
}
// if the motion is too large, it is probably wrong
Sophus::Vector6d d = T_c_r_estimated_.log();
if ( d.norm() > 5.0 )
{
cout<<"reject because motion is too large: "<<d.norm()<<endl;
return false;
}
return true;
}
bool VisualOdometry::checkKeyFrame()
{
Sophus::Vector6d d = T_c_r_estimated_.log();
Vector3d trans = d.head<3>();
Vector3d rot = d.tail<3>();
if ( rot.norm() >key_frame_min_rot || trans.norm() >key_frame_min_trans )
return true;
return false;
}
void VisualOdometry::addKeyFrame()
{
cout<<"adding a key-frame"<<endl;
map_->insertKeyFrame ( curr_ );
}
}
带BA优化的visual_odometry.cpp
/*
* <one line to give the program's name and a brief idea of what it does.>
* Copyright (C) 2016 <copyright holder> <email>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <algorithm>
#include <boost/timer.hpp>
#include "myslam/config.h"
#include "myslam/visual_odometry.h"
#include "myslam/g2o_types.h"
namespace myslam
{
VisualOdometry::VisualOdometry() :
state_ ( INITIALIZING ), ref_ ( nullptr ), curr_ ( nullptr ), map_ ( new Map ), num_lost_ ( 0 ), num_inliers_ ( 0 ), matcher_flann_( new cv::flann::LshIndexParams(5,10,2) )
{
num_of_features_ = Config::get<int> ( "number_of_features" );
scale_factor_ = Config::get<double> ( "scale_factor" );
level_pyramid_ = Config::get<int> ( "level_pyramid" );
match_ratio_ = Config::get<float> ( "match_ratio" );
max_num_lost_ = Config::get<float> ( "max_num_lost" );
min_inliers_ = Config::get<int> ( "min_inliers" );
key_frame_min_rot = Config::get<double> ( "keyframe_rotation" );
key_frame_min_trans = Config::get<double> ( "keyframe_translation" );
map_point_erase_ratio_ = Config::get<double> ("map_point_erase_ratio");
orb_ = cv::ORB::create ( num_of_features_, scale_factor_, level_pyramid_ );
}
VisualOdometry::~VisualOdometry()
{
}
bool VisualOdometry::addFrame ( Frame::Ptr frame )
{
switch ( state_ )
{
case INITIALIZING:
{
state_ = OK;
curr_ = ref_ = frame;
map_->insertKeyFrame ( frame );
// extract features from first frame and add them into map
extractKeyPoints();
computeDescriptors();
setRef3DPoints();
break;
}
case OK:
{
curr_ = frame;
extractKeyPoints();
computeDescriptors();
featureMatching();
poseEstimationPnP();
if ( checkEstimatedPose() == true ) // a good estimation
{
curr_->T_c_w_ = T_c_r_estimated_ * ref_->T_c_w_; // T_c_w = T_c_r*T_r_w
ref_ = curr_;
setRef3DPoints();
num_lost_ = 0;
if ( checkKeyFrame() == true ) // is a key-frame
{
addKeyFrame();
}
}
else // bad estimation due to various reasons
{
num_lost_++;
if ( num_lost_ > max_num_lost_ )
{
state_ = LOST;
}
return false;
}
break;
}
case LOST:
{
cout<<"vo has lost."<<endl;
break;
}
}
return true;
}
void VisualOdometry::extractKeyPoints()
{
boost::timer timer;
orb_->detect ( curr_->color_, keypoints_curr_ );
cout<<"extract keypoints cost time: "<<timer.elapsed()<<endl;
}
void VisualOdometry::computeDescriptors()
{
boost::timer timer;
orb_->compute ( curr_->color_, keypoints_curr_, descriptors_curr_ );
cout<<"descriptor computation cost time: "<<timer.elapsed()<<endl;
}
void VisualOdometry::featureMatching()
{
boost::timer timer;
vector<cv::DMatch> matches;
matcher_flann_.match( descriptors_ref_, descriptors_curr_, matches );
// select the best matches
float min_dis = std::min_element (
matches.begin(), matches.end(),
[] ( const cv::DMatch& m1, const cv::DMatch& m2 )
{
return m1.distance < m2.distance;
} )->distance;
feature_matches_.clear();
for ( cv::DMatch& m : matches )
{
if ( m.distance < max<float> ( min_dis*match_ratio_, 30.0 ) )
{
feature_matches_.push_back(m);
}
}
cout<<"good matches: "<<feature_matches_.size()<<endl;
cout<<"match cost time: "<<timer.elapsed()<<endl;
}
void VisualOdometry::setRef3DPoints()
{
// select the features with depth measurements
pts_3d_ref_.clear();
descriptors_ref_ = Mat();
for ( size_t i=0; i<keypoints_curr_.size(); i++ )
{
double d = ref_->findDepth(keypoints_curr_[i]);
if ( d > 0)
{
Vector3d p_cam = ref_->camera_->pixel2camera(
Vector2d(keypoints_curr_[i].pt.x, keypoints_curr_[i].pt.y), d
);
pts_3d_ref_.push_back( cv::Point3f( p_cam(0,0), p_cam(1,0), p_cam(2,0) ));
descriptors_ref_.push_back(descriptors_curr_.row(i));
}
}
}
void VisualOdometry::poseEstimationPnP()
{
// construct the 3d 2d observations
vector<cv::Point3f> pts3d;
vector<cv::Point2f> pts2d;
for ( cv::DMatch m:feature_matches_ )
{
pts3d.push_back( pts_3d_ref_[m.queryIdx] );
pts2d.push_back( keypoints_curr_[m.trainIdx].pt );
}
Mat K = ( cv::Mat_<double>(3,3)<<
ref_->camera_->fx_, 0, ref_->camera_->cx_,
0, ref_->camera_->fy_, ref_->camera_->cy_,
0,0,1
);
Mat rvec, tvec, inliers;
cv::solvePnPRansac( pts3d, pts2d, K, Mat(), rvec, tvec, false, 100, 4.0, 0.99, inliers );
num_inliers_ = inliers.rows;
cout<<"pnp inliers: "<<num_inliers_<<endl;
T_c_r_estimated_ = SE3(
SO3(rvec.at<double>(0,0), rvec.at<double>(1,0), rvec.at<double>(2,0)),
Vector3d( tvec.at<double>(0,0), tvec.at<double>(1,0), tvec.at<double>(2,0))
);
// using bundle adjustment to optimize the pose
typedef g2o::BlockSolver<g2o::BlockSolverTraits<6,2>> Block;
Block::LinearSolverType* linearSolver = new g2o::LinearSolverDense<Block::PoseMatrixType>();
Block* solver_ptr = new Block( linearSolver );
g2o::OptimizationAlgorithmLevenberg* solver = new g2o::OptimizationAlgorithmLevenberg ( solver_ptr );
g2o::SparseOptimizer optimizer;
optimizer.setAlgorithm ( solver );
g2o::VertexSE3Expmap* pose = new g2o::VertexSE3Expmap();
pose->setId ( 0 );
pose->setEstimate ( g2o::SE3Quat (
T_c_r_estimated_.rotation_matrix(),
T_c_r_estimated_.translation()
) );
optimizer.addVertex ( pose );
// edges
for ( int i=0; i<inliers.rows; i++ )
{
int index = inliers.at<int>(i,0);
// 3D -> 2D projection
EdgeProjectXYZ2UVPoseOnly* edge = new EdgeProjectXYZ2UVPoseOnly();
edge->setId(i);
edge->setVertex(0, pose);
edge->camera_ = curr_->camera_.get();
edge->point_ = Vector3d( pts3d[index].x, pts3d[index].y, pts3d[index].z );
edge->setMeasurement( Vector2d(pts2d[index].x, pts2d[index].y) );
edge->setInformation( Eigen::Matrix2d::Identity() );
optimizer.addEdge( edge );
}
optimizer.initializeOptimization();
optimizer.optimize(10);
T_c_r_estimated_ = SE3 (
pose->estimate().rotation(),
pose->estimate().translation()
);
}
bool VisualOdometry::checkEstimatedPose()
{
// check if the estimated pose is good
if ( num_inliers_ < min_inliers_ )
{
cout<<"reject because inlier is too small: "<<num_inliers_<<endl;
return false;
}
// if the motion is too large, it is probably wrong
Sophus::Vector6d d = T_c_r_estimated_.log();
if ( d.norm() > 5.0 )
{
cout<<"reject because motion is too large: "<<d.norm()<<endl;
return false;
}
return true;
}
bool VisualOdometry::checkKeyFrame()
{
Sophus::Vector6d d = T_c_r_estimated_.log();
Vector3d trans = d.head<3>();
Vector3d rot = d.tail<3>();
if ( rot.norm() >key_frame_min_rot || trans.norm() >key_frame_min_trans )
return true;
return false;
}
void VisualOdometry::addKeyFrame()
{
cout<<"adding a key-frame"<<endl;
map_->insertKeyFrame ( curr_ );
}
}
引入迭代优化后,估计结果的质量比纯粹的RANSAC PnP有明显的提高。尽管依然仅用两两帧间的信息,得到的结果却更加准确、平稳。
局限:仍受两两帧间匹配的局限性影响。一旦视频序列当中某个帧丢失,会导致后续的帧也无法和上一帧匹配。
参考文献
- 高翔《视觉SLAM十四讲:从理论到实践》
参考代码
https://github.com/gaoxiang12/slambook2
https://github.com/gaoxiang12/slambook