输入:两张彩图和两张对应的深度图
输出:相机的位姿变化量
Δ
T
\Delta T
ΔT
需要使用的第三方库:
opencv Eigen3 Sophus
解决问题:根据两幅RGB图像和对应的深度图像,通过特征点匹配得到共视点,然后通过Bundle Adjustment来优化位姿。
程序部分说明
关于相机位姿变换的雅克比矩阵为:
∂
e
∂
δ
ξ
=
−
[
f
x
Z
′
0
−
f
x
X
′
Z
′
2
−
f
x
X
′
Y
′
Z
′
2
f
x
(
1
+
X
′
2
Z
′
2
)
−
f
x
Y
′
Z
′
0
f
y
Z
′
−
f
y
Y
′
Z
′
2
−
f
y
−
f
y
Y
′
2
Z
′
2
f
y
X
′
Y
′
Z
′
2
f
y
X
′
Z
′
]
\frac{\partial e}{\partial \delta\xi}=-\begin{bmatrix}\frac{f_x}{Z^{'} } & 0 & -\frac{f_xX^{'}}{Z^{'2}} & -\frac{f_xX^{'}Y^{'}}{Z^{'2}}& f_x(1+\frac{X^{'2}}{Z^{'2}}) & -\frac{f_xY^{'}}{Z^{'}} \\ 0& \frac{f_y}{Z^{'}} & -\frac{f_yY_{'}}{Z^{'2}}& -f_y-\frac{f_yY^{'2}}{Z^{'2}} & \frac{f_yX^{'}Y^{'}}{Z^{'2}}& \frac{f_yX^{'}}{Z^{'}}\end{bmatrix}
∂δξ∂e=−⎣⎡Z′fx00Z′fy−Z′2fxX′−Z′2fyY′−Z′2fxX′Y′−fy−Z′2fyY′2fx(1+Z′2X′2)Z′2fyX′Y′−Z′fxY′Z′fyX′⎦⎤
对应的H矩阵为6x6,
error为2维向量,对应的物理含义为图像的x坐标的偏差和y坐标的偏差,
dx为6维的向量,对应的物理含义为相机的3维的旋转向量和3维的位移向量。
对应的代码表示为:
//从世界坐标系转到相机坐标系
Vector3d P = T_esti * p3;
double x = P[0];
double y = P[1];
double z = P[2];
//从相机坐标系投影到图像坐标系
Vector2d p2_ = {fx * ( x/z ) + cx, fy * ( y/z ) + cy};
//计算重投影误差
Vector2d e = p2 - p2_;
//计算 代价(误差的平方和)
cost += (e[0]*e[0]+e[1]*e[1]);
// 计算相机的雅克比矩阵
Matrix<double, 2, 6> J;
J(0,0) = -(fx/z);
J(0,1) = 0;
J(0,2) = (fx*x/(z*z));
J(0,3) = (fx*x*y/(z*z));
J(0,4) = -(fx*x*x/(z*z)+fx);
J(0,5) = (fx*y/z);
J(1,0) = 0;
J(1,1) = -(fy/z);
J(1,2) = (fy*y/(z*z));
J(1,3) = (fy*y*y/(z*z)+fy);
J(1,4) = -(fy*x*y/(z*z));
J(1,5) = -(fy*x/z);
计算H矩阵和b向量。
H
=
∑
i
J
i
T
J
i
H=\sum_{i}J_{i}^TJ_{i}
H=i∑JiTJi
b
=
∑
i
J
i
e
i
b=\sum_{i}J_{i} e_i
b=i∑Jiei
H += J.transpose() * J;
b += -J.transpose() * e;
求解迭代的优化值:
H
Δ
x
=
b
H \Delta x = b
HΔx=b
// solve dx
Vector6d dx;
dx = H.ldlt().solve(b);
运行结果:
iteration 0 cost=6610550.956991
iteration 1 cost=121634.67318447
iteration 2 cost=12521.908838332
iteration 3 cost=12521.880097547
iteration 4 cost=12521.880096746
iteration 5 cost=12521.880096746
cost: 521.880096746, last cost: 521.880096746
estimated pose:
0.997949728497 -0.0552131348392 0.0323704979055 -0.127344711404
0.0497227865044 0.98727431105 0.151053233145 0.0196207402915
-0.0402986835476 -0.149133981649 0.98799548158 0.0514250323419
0 0 0 1
下面是基于高斯牛顿法的BA求解:
//
// Created by wpr on 18-12-19.
//
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/features2d.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <Eigen/Core>
#include <Eigen/Dense>
#include "sophus/se3.h"
using namespace cv;
using namespace std;
using namespace Eigen;
typedef vector<Vector3d, Eigen::aligned_allocator<Vector3d>> VecVector3d;
typedef vector<Vector2d, Eigen::aligned_allocator<Vector3d>> VecVector2d;
typedef Matrix<double, 6, 1> Vector6d;
int main()
{
cout << "Hello BA~" << endl;
VecVector2d p2d;
VecVector3d p3d;
double fx = 520.9, fy = 521.0, cx = 325.1, cy = 249.7;
Mat img1 = imread("./data/1.png",CV_LOAD_IMAGE_COLOR);
Mat img2 = imread("./data/2.png",CV_LOAD_IMAGE_COLOR);
vector<KeyPoint> keypoints1;
vector<KeyPoint> keypoints2;
vector<DMatch> matches;
Mat descriptions1,descriptions2;
Ptr<FeatureDetector> detector = ORB::create();
Ptr<DescriptorExtractor> descriptor = ORB::create();
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
detector->detect(img1,keypoints1);
detector->detect(img2,keypoints2);
descriptor-> compute(img1, keypoints1, descriptions1);
descriptor-> compute(img2, keypoints2, descriptions2);
vector<DMatch> match;
matcher->match(descriptions1,descriptions2,match);
//匹配点对筛选
double min_dist=10000, max_dist=0;
for(int i = 0; i < descriptions1.rows; i++)
{
double dist = match[i].distance;
if(dist < min_dist)min_dist = dist;
if(dist > max_dist)max_dist = dist;
}
cout << "-- Min dist :" << min_dist << endl;
cout << "-- Max dist :" << max_dist << endl;
for(int i = 0; i < descriptions1.rows; i++)
{
if ( match[i].distance <= max ( 2*min_dist, 30.0 ) )
matches.push_back(match[i]);
}
cout << "一共找到了" << matches.size() << "组匹配点" << endl;
Mat depthimg = imread("../data/1_depth.png", CV_LOAD_IMAGE_COLOR);
for(auto m:matches)
{
ushort d = depthimg.ptr<unsigned short >(int( keypoints1[m.queryIdx].pt.y))[int (keypoints1[m.queryIdx].pt.x)];
if (d == 0)
continue;
float z = d/5000.0;
float y = ( keypoints1[m.queryIdx].pt.x - cx) * z /fx;
float x = ( keypoints1[m.queryIdx].pt.y - cy) * z /fy;
p2d.push_back(Vector2d(keypoints1[m.queryIdx].pt.x, keypoints1[m.queryIdx].pt.y));
p3d.push_back(Vector3d(x, y, z));
}
int iterator_time = 100;
double cost = 0, lastcost = 0;
int n_points = p3d.size();
cout << "路标点的数量为" << n_points << endl;
Sophus::SE3 T_esti; //camera pose
for (auto iter = 0; iter < iterator_time; iter++)
{
Matrix<double, 6, 6> H = Matrix<double, 6, 6>::Zero();
Vector6d b = Vector6d::Zero();
cost = 0;
//compute cost
for ( int i = 0; i < n_points; i++)
{
Vector2d p2 = p2d[i];
Vector3d p3 = p3d[i];
Vector3d P = T_esti * p3;
double x = P[0];
double y = P[1];
double z = P[2];
Vector2d p2_ = {fx * ( x/z ) + cx, fy * ( y/z ) + cy};
Vector2d e = p2 - p2_;
cost += (e[0]*e[0]+e[1]*e[1]);
// compute jacobian
Matrix<double, 2, 6> J;
J(0,0) = -(fx/z);
J(0,1) = 0;
J(0,2) = (fx*x/(z*z));
J(0,3) = (fx*x*y/(z*z));
J(0,4) = -(fx*x*x/(z*z)+fx);
J(0,5) = (fx*y/z);
J(1,0) = 0;
J(1,1) = -(fy/z);
J(1,2) = (fy*y/(z*z));
J(1,3) = (fy*y*y/(z*z)+fy);
J(1,4) = -(fy*x*y/(z*z));
J(1,5) = -(fy*x/z);
H += J.transpose() * J;
b += -J.transpose() * e;
}
// solve dx
Vector6d dx;
dx = H.ldlt().solve(b);
if (isnan(dx[0])) {
cout << "result is nan!" << endl;
break;
}
if (iter > 0 && cost >= lastcost) {
// 误差增长了,说明近似的不够好
cout << "cost: " << cost << ", last cost: " << lastcost << endl;
break;
}
// 更新估计位姿
T_esti = Sophus::SE3::exp(dx)*T_esti;
lastcost = cost;
cout << "iteration " << iter << " cost=" << cout.precision(12) << cost << endl;
}
return 0;
}