1、LM算法原理
2、 高斯-牛顿算法
将进行一阶展开为:
。
目标函数可转化为
上式对求导得:
简化为,即为高斯-牛顿算法的更新方程
3、GN与LM的区别
LM会拒绝可能导致更大的残差平方和的更新,速度也可能会慢些,而使用GN必须保证H的可逆性。
4、LM的c++简单实现
#include <bits/stdc++.h>
#include <eigen3/Eigen/Dense>
using namespace std;
using namespace Eigen;
vector<Vector2d> obs_;
int max_iterations_ = 100;
int max_inner_iterations_ = 10;
int lambda_;
VectorXd errors_;
Matrix3d hessian_;
Vector3d hb_;
MatrixXd jacobian_;
double tau_ = 1e-5;
double epsilon_ = 1e-5;
double current_squared_error;
double a_,b_,c_;
void computeJacobian();
void computeHb();
void initLambda();
int main(){
double a = 1, b = 2 , c= 3;
a_ = 0, b_= 0, c_=0;
std::default_random_engine generator;
std::normal_distribution<double> noise(0,0.1);
for(int i=0;i<100;i++){
Vector2d ob;
ob(0) = i;
ob(1) = a*i*i+b*i+c+noise(generator);
obs_.push_back(ob);
}
computeJacobian();
computeHb();
initLambda();
int iter = 0;
double delta_norm; //每次优化增量的大小
double v = 2.0;
while (iter++<max_iterations_)
{
current_squared_error = errors_.squaredNorm();
int inner_iter = 0;
while (inner_iter++ < max_inner_iterations_)
{
Matrix3d damper = lambda_ * Matrix3d::Identity();
Vector3d delta = (hessian_ + damper).inverse()*hb_;
double aa = a_ + delta(0);
double bb = b_ + delta(1);
double cc = c_ + delta(2);
delta_norm = delta.norm();
double new_squared_error = 0.0;
for(int i = 0;i<obs_.size();i++){
double xi = obs_[i](0);
double yi = obs_[i](1);
double yy = aa*xi*xi+bb*xi+cc;
new_squared_error += pow(yy-yi,2);
}
double rou = (current_squared_error-new_squared_error)/
(0.5*delta.transpose()*(lambda_*delta+hb_)+1e-3);
if(rou>0){
a_= aa,b_ = bb,c_=cc;
lambda_ = lambda_*max(1.0/3.0, 1-pow((2*rou-1),3));
v = 2;
break;
}else{
lambda_ = lambda_ *v;
v *= 2;
}
}
if(delta_norm< epsilon_){
break;
}
computeJacobian();
computeHb();
}
cout<< a_<<" "<<b_<<" "<<c_<<endl;
return 0;
}
void initLambda(){
double max_diagnal = 0.;
for(int i=0;i<3;i++){
max_diagnal = max(fabs(hessian_(i,i)), max_diagnal);
}
lambda_ = tau_ * max_diagnal;
}
void computeJacobian(){
jacobian_.resize(obs_.size(),3);
errors_.resize(obs_.size());
for(int i=0;i<obs_.size();i++){
double xi = obs_[i][0];
double yi = obs_[i][1];
Matrix<double,1,3> jac;
double yy = a_*xi*xi+b_*xi+c_;
jac(0,0) = xi*xi;
jac(0,1) = xi;
jac(0,2) = 1;
jacobian_.row(i) = jac;
errors_(i) = yy-yi;
}
}
void computeHb(){
hessian_ = jacobian_.transpose()*jacobian_;
hb_ = -jacobian_.transpose()*errors_;
}
5、Ceres示例
#include<bits/stdc++.h>
#include<eigen3/Eigen/Dense>
#include<ceres/ceres.h>
using namespace std;
using namespace Eigen;
struct costFunc
{
costFunc(double x, double y):x_(x),y_(y){}
template<typename T>
bool operator()(const T* const a,const T* const b, const T* const c, T* residual) const{
residual[0] = T(y_) - (a[0]*x_*x_+b[0]*x_+c[0]);
return true;
}
const double x_;
const double y_;
};
int main(){
vector<Vector2d> obs;
std::default_random_engine generator;
std::normal_distribution<double> noise(0,0.1);
double a = 1.,b = 2., c= 3.;
double a0 = 0,b0= 0,c0 = 0;
cout<< "original: " << a0<<" "<<b0<<" "<<c0<<endl;
for(auto x=1.0;x<100;x+=1.){
double y = a*x*x+b*x+c + noise(generator);
obs.push_back(Vector2d(x,y));
}
ceres::Problem problem;
for(int i=0;i<obs.size();i++){
ceres::CostFunction* pCostFunction = new ceres::AutoDiffCostFunction<costFunc,1,1,1,1>(
new costFunc(obs[i][0],obs[i][1]));
problem.AddResidualBlock(pCostFunction, nullptr, &a0,&b0,&c0);
}
ceres::Solver::Options options;
options.linear_solver_type = ceres::DENSE_QR;
options.minimizer_progress_to_stdout = true;
ceres::Solver::Summary summary;
ceres::Solve(options,&problem,&summary);
cout<< summary.BriefReport()<<endl;
cout<< "optimized: " << a0<<" "<<b0<<" "<<c0<<endl;
return 0;
}
解析求导形式:
#include<bits/stdc++.h>
#include<eigen3/Eigen/Dense>
#include<ceres/ceres.h>
using namespace std;
using namespace Eigen;
class CostFunc: public ceres::SizedCostFunction<1,3>{
public:
CostFunc(double x , double y):x_(x),y_(y){}
virtual ~CostFunc(){}
virtual bool Evaluate(double const* const* parameters, double* residuals, double** jacobians) const{
const double a = parameters[0][0];
const double b = parameters[0][1];
const double c = parameters[0][2];
residuals[0] = a*x_*x_+b*x_+c -y_;
if(!jacobians) return true;
double* jac = jacobians[0];
if(!jac) return true;
jac[0] = x_*x_;
jac[1] = x_;
jac[2] = 1;
return true;
}
private:
const double x_;
const double y_;
};
int main(){
vector<Vector2d> obs;
std::default_random_engine generator;
std::normal_distribution<double> noise(0,0.1);
double a = 1.,b = 2., c= 3.;
double a0 = 0,b0= 0,c0 = 0;
cout<< "original: " << a0<<" "<<b0<<" "<<c0<<endl;
for(auto x=1.0;x<100;x+=1.){
double y = a*x*x+b*x+c + noise(generator);
obs.push_back(Vector2d(x,y));
}
double param[3] = {a0,b0,c0};
ceres::Problem problem;
for(int i=0;i<obs.size();i++){
ceres::CostFunction* pCostFunction = new CostFunc(obs[i][0],obs[i][1]);
problem.AddResidualBlock(pCostFunction, nullptr, param);
}
ceres::Solver::Options options;
options.linear_solver_type = ceres::DENSE_QR;
options.minimizer_progress_to_stdout = true;
ceres::Solver::Summary summary;
ceres::Solve(options,&problem,&summary);
cout<< summary.BriefReport()<<endl;
cout<< "optimized: " << param[0]<<" "<<param[1]<<" "<<param[2]<<endl;
return 0;
}