2.LK光流
2.1文献综述
- forwards additive algorithm(i.e. Lucas-Kanade)
forwards compositional algorithm
inverse additive algorithm
inverse compositional algorithm - The incremental update to the warp
W
(
x
;
Δ
p
)
\bm W(\bm x; \Delta \bm p)
W(x;Δp) must be composed with the current estimate of the warp
W
(
x
;
p
)
\bm W(\bm x; \bm p)
W(x;p) in the compositional algorithm.
(讲解:warp即考虑图像块在不同相机中发生了仿射变换。带 warp 之后对旋转更加鲁棒) - As a number of authors have pointed out, there is a huge computational cost in re-evaluating the Hessian in every iteration of the Lucas-Kanade algorithm. If the Hessian were constant it could be precomputed and then re-used.
forward:
∑ x [ I ( W ( W ( x ; Δ p ) ; p ) ) − T ( x ) ] 2 \sum_{\mathrm{x}}[I(\mathbf{W}(\mathbf{W}(\mathbf{x} ; \Delta \mathbf{p}) ; \mathbf{p}))-T(\mathbf{x})]^{2} x∑[I(W(W(x;Δp);p))−T(x)]2
W ( x ; p ) ← W ( x ; p ) ∘ W ( x ; Δ p ) \mathbf{W}(\mathbf{x} ; \mathbf{p}) \leftarrow \mathbf{W}(\mathbf{x} ; \mathbf{p}) \circ \mathbf{W}(\mathbf{x} ; \Delta \mathbf{p}) W(x;p)←W(x;p)∘W(x;Δp)
inverse:
∑ x [ T ( W ( x ; Δ p ) ) − I ( W ( x ; p ) ) ] 2 \sum_{\mathrm{x}}[T(\mathbf{W}(\mathbf{x} ; \Delta \mathbf{p}))-I(\mathbf{W}(\mathbf{x} ; \mathbf{p}))]^{2} x∑[T(W(x;Δp))−I(W(x;p))]2
W ( x ; p ) ← W ( x ; p ) ∘ W ( x ; Δ p ) − 1 \mathbf{W}(\mathbf{x} ; \mathbf{p}) \leftarrow \mathbf{W}(\mathbf{x} ; \mathbf{p}) \circ \mathbf{W}(\mathbf{x} ; \Delta \mathbf{p})^{-1} W(x;p)←W(x;p)∘W(x;Δp)−1
2.2 正向光流法实现
- 每个像素的误差为第一张图像在特征点(x, y) 处的像素值减去第二张图像在(x+dx, y+dy) 处的像素值
- 记自变量为
[
Δ
x
,
Δ
y
]
T
[\Delta x, \Delta y]^T
[Δx,Δy]T,用中值查分表示图像梯度,导数为
[ − d I 2 d Δ x − d I 2 d Δ y ] = [ − I 2 ( x + Δ x + 1 , y + Δ y ) − I 2 ( x + Δ x − 1 , y + Δ y ) 2 − I 2 ( x + Δ x , y + Δ y + 1 ) − I 2 ( x + Δ x , y + Δ y − 1 ) 2 ] \begin{bmatrix} -\dfrac{d I_2}{d\Delta x} \\ \\ -\dfrac{d I_2}{d\Delta y} \end{bmatrix} = \begin{bmatrix} -\dfrac{I_2(x+\Delta x +1, y+\Delta y) - I_2(x+\Delta x -1, y+\Delta y)}{2} \\ \\-\dfrac{I_2(x+\Delta x, y+\Delta y +1) - I_2(x+\Delta x, y+\Delta y -1)}{2} \end{bmatrix} ⎣⎢⎢⎢⎡−dΔxdI2−dΔydI2⎦⎥⎥⎥⎤=⎣⎢⎢⎢⎡−2I2(x+Δx+1,y+Δy)−I2(x+Δx−1,y+Δy)−2I2(x+Δx,y+Δy+1)−I2(x+Δx,y+Δy−1)⎦⎥⎥⎥⎤
2.3 反向光流法实现
void OpticalFlowSingleLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,
vector<KeyPoint> &kp2,
vector<bool> &success,
bool inverse
) {
// parameters
int half_patch_size = 4;
int iterations = 10;
bool have_initial = !kp2.empty();// 最开始空 have_initial = false
for (size_t i = 0; i < kp1.size(); i++) {
auto kp = kp1[i];
double dx = 0, dy = 0; // dx,dy need to be estimated
if (have_initial) {
dx = kp2[i].pt.x - kp.pt.x;
dy = kp2[i].pt.y - kp.pt.y;
}
double cost = 0, lastCost = 0;
bool succ = true; // indicate if this point succeeded
// Gauss-Newton iterations
// 通过最小化二者灰度差的平方计算 dx, dy
for (int iter = 0; iter < iterations; iter++) {
Eigen::Matrix2d H = Eigen::Matrix2d::Zero();
Eigen::Vector2d b = Eigen::Vector2d::Zero();
Eigen::Vector2d J; // Jacobian
if (inverse == false) {
H = Eigen::Matrix2d::Zero();
b = Eigen::Vector2d::Zero();
} else {
// only reset b
b = Eigen::Vector2d::Zero();
}
cost = 0;
if (kp.pt.x + dx <= half_patch_size || kp.pt.x + dx >= img1.cols - half_patch_size ||
kp.pt.y + dy <= half_patch_size || kp.pt.y + dy >= img1.rows - half_patch_size) { // go outside
succ = false;
break;
}
// compute cost and jacobian
for (int x = -half_patch_size; x < half_patch_size; x++)
for (int y = -half_patch_size; y < half_patch_size; y++) {
// TODO START YOUR CODE HERE (~8 lines)
double error = GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y) -
GetPixelValue(img2, kp.pt.x + x + dx, kp.pt.y + y + dy);
if (inverse == false) {
// Forward Jacobian 中值梯度
J = -1.0 * Eigen::Vector2d(
0.5 * (GetPixelValue(img2, kp.pt.x + dx + x + 1, kp.pt.y + dy + y) -
GetPixelValue(img2, kp.pt.x + dx + x - 1, kp.pt.y + dy + y)),
0.5 * (GetPixelValue(img2, kp.pt.x + dx + x, kp.pt.y + dy + y + 1) -
GetPixelValue(img2, kp.pt.x + dx + x, kp.pt.y + dy + y - 1))
);
} else if (iter == 0) {
// Inverse Jacobian
// NOTE this J does not change when dx, dy is updated, so we can store it and only compute error
J = -1.0 * Eigen::Vector2d(
0.5 * (GetPixelValue(img1, kp.pt.x + x + 1, kp.pt.y + y) -
GetPixelValue(img1, kp.pt.x + x - 1, kp.pt.y + y)),
0.5 * (GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y + 1) -
GetPixelValue(img1, kp.pt.x + x, kp.pt.y + y - 1))
);
}
// compute H, b and set cost;
// TODO END YOUR CODE HERE
b += -error * J;
cost += error * error;
if (inverse == false || iter == 0) {
// also update H
H += J * J.transpose();
}
}
// compute update
// TODO START YOUR CODE HERE (~1 lines)
Eigen::Vector2d update = H.ldlt().solve(b);
// TODO END YOUR CODE HERE
if (isnan(update[0])) {
// sometimes occurred when we have a black or white patch and H is irreversible
cout << "update is nan" << endl;
succ = false;
break;
}
if (iter > 0 && cost > lastCost) {
cout << "cost increased: " << cost << ", " << lastCost << endl;
break;
}
// update dx, dy
dx += update[0];
dy += update[1];
lastCost = cost;
succ = true;
}
success.push_back(succ);
// set kp2 非空直接覆盖 空则pushback
if (have_initial) {
kp2[i].pt = kp.pt + Point2f(dx, dy);
} else {
KeyPoint tracked = kp;
tracked.pt += cv::Point2f(dx, dy);
kp2.push_back(tracked);
}
}
}
2.4光流金字塔
- 光流法中通过对图像缩放不同倍率,计算光流时先从最顶层图像开始计算,然后把上一层计算结果作为下一层计算的初值,即是一个由粗到精的过程。该方法使图像实际运动太大时,图像缩小后运动的像素比较小,也能较好追踪到
- 特征点法金字塔用于在图片缩放不同尺度下对比特征点,保证特征点的尺度不变性,即相机向前向后运动后同一特征点也能在像素平面内匹配上
void OpticalFlowMultiLevel(
const Mat &img1,
const Mat &img2,
const vector<KeyPoint> &kp1,
vector<KeyPoint> &kp2,
vector<bool> &success,
bool inverse) {
// parameters
int pyramids = 4;
double pyramid_scale = 0.5;
double scales[] = {1.0, 0.5, 0.25, 0.125};
//最顶层
vector<KeyPoint> kp1_pyr, kp2_pyr;
for (auto &kp:kp1) {
auto kp_top = kp;
kp_top.pt *= scales[pyramids - 1];
kp1_pyr.push_back(kp_top);
kp2_pyr.push_back(kp_top);
}
// TODO START YOUR CODE HERE (~8 lines)
for (int i = 0; i < pyramids; i++) {
Mat img1temp, img2temp;
resize(img1, img1temp, Size(img1.size().width*scales[pyramids-1-i], img1.size().height*scales[pyramids-1-i]));//Size_(_Tp _width, _Tp _height);
resize(img2, img2temp, Size(img2.size().width*scales[pyramids-1-i], img2.size().height*scales[pyramids-1-i]));//Size_(_Tp _width, _Tp _height);
OpticalFlowSingleLevel(img1temp, img2temp, kp1_pyr, kp2_pyr, success, inverse);
if (i < pyramids-1) {
for (auto &kp: kp1_pyr)
kp.pt /= pyramid_scale;
for (auto &kp: kp2_pyr)
kp.pt /= pyramid_scale;
}
}
// TODO END YOUR CODE HERE
// don't forget to set the results into kp2
for (auto &kp: kp2_pyr)
kp2.push_back(kp);
}
运行结果:
2.5讨论
- LK光流法有三个假设条件:
(1) 亮度恒定:一个像素点随着时间的变化,其亮度值(像素灰度值)是恒定不变的。
(2) 小运动: 时间的变化不会引起位置的剧烈变化。这样才能利用相邻帧之间的位置变化引起的灰度值变化,去求取灰度对位置的偏导数。
(3) 空间(局部)一致:即前一帧中相邻像素点在后一帧中也是相邻的。这是LK光流法独有的假定。为了求取x,y方向的速度,需要建立多个方程联立求解。而空间一致假设就可以利用邻域n个像素点来建立n个方程。
在满足以上条件的情况下是合理的,当灰度值变化较大、图像运动较快、特征点局部邻域内灰度值不一致时不合理
保证相机曝光恒定(或加入曝光时间作为变量)、对图像块减去均值计算相对亮度信息、图像金字塔法 - 在该作业中图像块大效果好一些,当窗口较大时,受异常点影响减小光流计算更鲁棒,当窗口较小时,光流计算更精确
- 金字塔层数一般越多效果越好,但层数太高后顶层图像太小,特征点太过紧密容易出现错误追踪;
放大倍率小,金字塔的层数相对更多,计算效果更好,但增加计算量
3.直接法
3.1
1.光度误差:
e
=
I
r
e
f
(
π
(
p
i
)
)
−
I
c
u
r
(
π
(
T
c
u
r
,
r
e
f
p
i
)
)
\bm e = I_{\mathrm{ref}}\left(\pi\left(\mathbf{p}_{i}\right)\right)-I_{\mathrm{cur}}\left(\pi\left(\mathbf{T}_{\mathrm{cur}, \mathrm{ref}} \mathbf{p}_{i}\right)\right)
e=Iref(π(pi))−Icur(π(Tcur,refpi))
2.维度:6×1
∂
u
∂
δ
ξ
=
[
f
x
Z
0
−
f
x
X
Z
2
−
f
x
X
Y
Z
2
f
x
+
f
x
X
2
Z
2
−
f
x
Y
Z
0
f
y
Z
−
f
y
Y
Z
2
−
f
y
−
f
y
Y
2
Z
2
f
y
X
Y
Z
2
f
y
X
Z
]
\frac{\partial \boldsymbol{u}}{\partial \delta \boldsymbol{\xi}}=\left[\begin{array}{ccccc} \frac{f_{x}}{Z} & 0 & -\frac{f_{x} X}{Z^{2}} & -\frac{f_{x} X Y}{Z^{2}} & f_{x}+\frac{f_{x} X^{2}}{Z^{2}} & -\frac{f_{x} Y}{Z} \\ 0 & \frac{f_{y}}{Z} & -\frac{f_{y} Y}{Z^{2}} & -f_{y}-\frac{f_{y} Y^{2}}{Z^{2}} & \frac{f_{y} X Y}{Z^{2}} & \frac{f_{y} X}{Z} \end{array}\right]
∂δξ∂u=[Zfx00Zfy−Z2fxX−Z2fyY−Z2fxXY−fy−Z2fyY2fx+Z2fxX2Z2fyXY−ZfxYZfyX]
J
T
=
−
∂
I
2
∂
u
∂
u
∂
δ
ξ
J^T=-\frac{\partial \boldsymbol{I}_{2}}{\partial \boldsymbol{u}} \frac{\partial \boldsymbol{u}}{\partial \delta \boldsymbol{\xi}}
JT=−∂u∂I2∂δξ∂u
3.可以取4*4等合适大小,可以取单个点但会降低鲁棒性
void DirectPoseEstimationSingleLayer(
const cv::Mat &img1,
const cv::Mat &img2,
const VecVector2d &px_ref,
const vector<double> depth_ref,
Sophus::SE3d &T21
) {
// parameters
int half_patch_size = 4;
int iterations = 100;
double cost = 0, lastCost = 0;
int nGood = 0; // good projections
VecVector2d goodProjection;
///用于画图对应
VecVector2d goodRef;
for (int iter = 0; iter < iterations; iter++) {
nGood = 0;
goodProjection.clear();
goodRef.clear();
// Define Hessian and bias
Matrix6d H = Matrix6d::Zero(); // 6x6 Hessian
Vector6d b = Vector6d::Zero(); // 6x1 bias
for (size_t i = 0; i < px_ref.size(); i++) {
// compute the projection in the second image
// TODO START YOUR CODE HERE
double tempXw = (px_ref[i][0] - cx)/fx*depth_ref[i];
double tempYw = (px_ref[i][1] - cy)/fy*depth_ref[i];
Eigen::Matrix<double, 3, 1> pw(tempXw,tempYw,depth_ref[i]);
Eigen::Matrix<double, 3, 1> pw2 = T21*pw;
if (pw2[2] < 0) /// depth invalid
continue;
double u = fx*pw2[0]/pw2[2]+cx;
double v = fy*pw2[1]/pw2[2]+cy;
if (u < half_patch_size || u > img2.cols - half_patch_size || v < half_patch_size ||
v > img2.rows - half_patch_size)
continue;
nGood++;
goodProjection.push_back(Eigen::Vector2d(u, v));
goodRef.push_back(Eigen::Vector2d(px_ref[i][0], px_ref[i][1]));
double X = pw2[0], Y = pw2[1], Z = pw2[2], Z_inv = 1.0 / Z, Z2_inv = Z_inv * Z_inv;
// and compute error and jacobian
for (int x = -half_patch_size; x < half_patch_size; x++)
for (int y = -half_patch_size; y < half_patch_size; y++) {
double error = GetPixelValue(img1, px_ref[i][0] + x, px_ref[i][1] + y) -
GetPixelValue(img2, u + x, v + y);
Matrix26d J_pixel_xi; // pixel to \xi in Lie algebra
Eigen::Vector2d J_img_pixel; // image gradients
J_pixel_xi(0, 0) = fx * Z_inv;
J_pixel_xi(0, 1) = 0;
J_pixel_xi(0, 2) = -fx * X * Z2_inv;
J_pixel_xi(0, 3) = -fx * X * Y * Z2_inv;
J_pixel_xi(0, 4) = fx + fx * X * X * Z2_inv;
J_pixel_xi(0, 5) = -fx * Y * Z_inv;
J_pixel_xi(1, 0) = 0;
J_pixel_xi(1, 1) = fy * Z_inv;
J_pixel_xi(1, 2) = -fy * Y * Z2_inv;
J_pixel_xi(1, 3) = -fy - fy * Y * Y * Z2_inv;
J_pixel_xi(1, 4) = fy * X * Y * Z2_inv;
J_pixel_xi(1, 5) = fy * X * Z_inv;
J_img_pixel = Eigen::Vector2d(
0.5 * (GetPixelValue(img2, u + 1 + x, v + y) - GetPixelValue(img2, u - 1 + x, v + y)),
0.5 * (GetPixelValue(img2, u + x, v + 1 + y) - GetPixelValue(img2, u + x, v - 1 + y))
);
// total jacobian
Vector6d J = -1.0 * (J_img_pixel.transpose() * J_pixel_xi).transpose();
H += J * J.transpose();
b += -error * J;
cost += error * error;
}
// END YOUR CODE HERE
}
// solve update and put it into estimation
// TODO START YOUR CODE HERE
Vector6d update = H.ldlt().solve(b);;
T21 = Sophus::SE3d::exp(update) * T21;
// END YOUR CODE HERE
cost /= nGood;
if (isnan(update[0])) {
// sometimes occurred when we have a black or white patch and H is irreversible
cout << "update is nan" << endl;
break;
}
if (iter > 0 && cost > lastCost) {
cout << "cost increased: " << cost << ", " << lastCost << endl;
break;
}
lastCost = cost;
cout << "cost = " << cost << ", good = " << nGood << endl;
}
cout << "good projection: " << nGood << endl;
cout << "T21 = \n" << T21.matrix() << endl;
// in order to help you debug, we plot the projected pixels here
cv::Mat img1_show, img2_show;
cv::cvtColor(img1, img1_show, CV_GRAY2BGR);
cv::cvtColor(img2, img2_show, CV_GRAY2BGR);
for (auto &px: px_ref) {
cv::rectangle(img1_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2),
cv::Scalar(0, 250, 0));
}
// for (auto &px: goodProjection) {
// cv::rectangle(img2_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2),
// cv::Scalar(0, 250, 0));
// }
for (size_t i = 0; i < goodProjection.size(); ++i) {
auto p_ref = goodRef[i];
auto p_cur = goodProjection[i];
if (p_cur[0] > 0 && p_cur[1] > 0) {
cv::circle(img2_show, cv::Point2f(p_cur[0], p_cur[1]), 1, cv::Scalar(0, 250, 0), 2);
cv::line(img2_show, cv::Point2f(p_ref[0], p_ref[1]), cv::Point2f(p_cur[0], p_cur[1]),
cv::Scalar(0, 255, 0));
}
}
cv::imwrite("reference.bmp", img1_show);
cv::imwrite("current.bmp", img2_show);
cv::imshow("reference", img1_show);
cv::imshow("current", img2_show);
cv::waitKey();
}
位姿估计结果:
T21 =
0.999991 0.00242132 0.00337215 -0.00184408
-0.00242871 0.999995 0.00218895 0.0026733
-0.00336684 -0.00219713 0.999992 -0.725126
0 0 0 1
T21 =
0.999972 0.00137286 0.00728921 0.00740741
-0.0014012 0.999991 0.00388447 -0.00131842
-0.00728382 -0.00389458 0.999966 -1.4707
0 0 0 1
T21 =
0.999913 0.000538359 0.0131511 -0.21474
-0.000610832 0.999985 0.00550738 -0.00418048
-0.013148 -0.00551494 0.999898 -1.89428
0 0 0 1
T21 =
0.999857 0.00286649 0.0166533 -0.282287
-0.00295021 0.999983 0.00500466 0.0267631
-0.0166387 -0.00505308 0.999849 -2.07205
0 0 0 1
T21 =
0.999734 0.00159018 0.0230094 -0.40731
-0.00169507 0.999988 0.00453986 0.0633486
-0.023002 -0.00457766 0.999725 -2.96869
0 0 0 1
运行结果:
3.2
1.
f
x
,
f
y
,
c
x
,
c
y
f_x , f_y , c_x , c_y
fx,fy,cx,cy均缩小一倍
void DirectPoseEstimationMultiLayer(
const cv::Mat &img1,
const cv::Mat &img2,
const VecVector2d &px_ref,
const vector<double> depth_ref,
Sophus::SE3d &T21
) {
// parameters
int pyramids = 4;
double pyramid_scale = 0.5;
double scales[] = {1.0, 0.5, 0.25, 0.125};
// create pyramids
vector<cv::Mat> pyr1, pyr2; // image pyramids
// TODO START YOUR CODE HERE
for (int i = 0; i < pyramids; i++) {
if (i == 0) {
pyr1.push_back(img1);
pyr2.push_back(img2);
} else {
cv::Mat img1_pyr, img2_pyr;
cv::resize(pyr1[i - 1], img1_pyr,
cv::Size(pyr1[i - 1].cols * pyramid_scale, pyr1[i - 1].rows * pyramid_scale));
cv::resize(pyr2[i - 1], img2_pyr,
cv::Size(pyr2[i - 1].cols * pyramid_scale, pyr2[i - 1].rows * pyramid_scale));
pyr1.push_back(img1_pyr);
pyr2.push_back(img2_pyr);
}
}
// END YOUR CODE HERE
double fxG = fx, fyG = fy, cxG = cx, cyG = cy; // backup the old values
for (int level = pyramids - 1; level >= 0; level--) {
VecVector2d px_ref_pyr; // set the keypoints in this pyramid level
for (auto &px: px_ref) {
px_ref_pyr.push_back(scales[level] * px);
}
// TODO START YOUR CODE HERE
// scale fx, fy, cx, cy in different pyramid levels
fx = fxG * scales[level];
fy = fyG * scales[level];
cx = cxG * scales[level];
cy = cyG * scales[level];
// END YOUR CODE HERE
DirectPoseEstimationSingleLayer(pyr1[level], pyr2[level], px_ref_pyr, depth_ref, T21);
}
}
3.3
1.可以。光流法中优化的是dx、dy,inverse使用参考图片在(x, y)处的梯度代替可以减少计算量
通过增量的表示方式来区分方法. 迭代更新运动参数的时候,如果迭代的结果是在原始的值(6个运动参数)上增加一个微小量,那么称之为Additive,如果在仿射矩阵上乘以一个矩阵(增量运动参数形成的增量仿射矩阵),这方法称之为Compositional。(the compositional approach iteratively solves for an incremental warp
W
(
x
;
Δ
p
)
\bm W(\bm x; \Delta \bm p)
W(x;Δp) rather than an additive update to the parameters
Δ
p
\Delta \bm p
Δp.)(讲解:Compositional即同时估计仿射变换参数)
实际意义不大
2.减小图像块大小,inverse法免去梯度重复计算
3.对应点处patch灰度值、深度信息不变
4.因为是直接法对灰度值优化,不是角点也没有关系
5.同书
4.光流计算视差
// calculate disparity error
float sum_dis_error = 0;
float sum_dis_ref = 0;
for (int i = 0; i < kp2_multi.size(); i++) {
if (success_multi[i]) {
float disparity_ref = imgdisparity.at<uchar>(kp1[i].pt.y, kp1[i].pt.x);
sum_dis_ref += disparity_ref;
float disparity_est = abs(kp1[i].pt.x - kp2_multi[i].pt.x);
float disparity_error = abs(disparity_ref - disparity_est);
sum_dis_error += disparity_error;
cout<<"The "<<i<<" point"<<" real disparity:"<<disparity_ref<<" LK disparity:"<<disparity_est
<<" disparity_error:"<<disparity_error<<endl;
}
}
cout << "Sum relative disparity error:" << sum_dis_error/sum_dis_ref << endl;
运行结果:
误差较大效果不好