LM、GN算法原理及实现

最新推荐文章于 2024-04-24 17:26:04 发布

置顶 td092

最新推荐文章于 2024-04-24 17:26:04 发布

阅读量4.9k

点赞数 2

分类专栏：编程基础文章标签： LM算法

欢迎交流，有评论就更

本文链接：https://blog.csdn.net/weixin_38258767/article/details/90377921

版权

编程基础专栏收录该内容

108 篇文章 4 订阅

订阅专栏

1、LM算法原理

2、高斯-牛顿算法

将 f(x) 进行一阶展开为： $f(x+\Delta x) = f(x)+J(x)\Delta x$ 。

目标函数可转化为 $\mathop{\arg\min}_{\Delta x} \frac{1}{2}\left \|f(x) +J(x) \Delta x \right \|^2$

上式对 $\Delta x$ 求导得： $J(x)^TJ(x) \Delta x = -J(x)^Tf(x)$

简化为 $H\Delta x = g$ ,即为高斯-牛顿算法的更新方程

3、GN与LM的区别

LM会拒绝可能导致更大的残差平方和的更新，速度也可能会慢些，而使用GN必须保证H的可逆性。

4、LM的c++简单实现

#include <bits/stdc++.h>
#include <eigen3/Eigen/Dense>

using namespace std;
using namespace Eigen;

vector<Vector2d> obs_;
int max_iterations_ = 100;
int max_inner_iterations_ = 10;
int lambda_;
VectorXd errors_;
Matrix3d hessian_;
Vector3d hb_;
MatrixXd jacobian_;
double tau_ = 1e-5;
double epsilon_ = 1e-5;
double current_squared_error;
double a_,b_,c_;

void computeJacobian();
void computeHb();
void initLambda();

int main(){
    double a = 1, b = 2 , c= 3;
    a_ = 0, b_= 0, c_=0;
    std::default_random_engine generator;
    std::normal_distribution<double> noise(0,0.1);
    for(int i=0;i<100;i++){
        Vector2d ob;
        ob(0) = i;
        ob(1) = a*i*i+b*i+c+noise(generator);
        obs_.push_back(ob);
    }
    computeJacobian();
    computeHb();
    initLambda();
    int iter = 0;
    double delta_norm; //每次优化增量的大小
    double v = 2.0;
    while (iter++<max_iterations_)
    {
        current_squared_error = errors_.squaredNorm();
        int inner_iter = 0;
        while (inner_iter++ < max_inner_iterations_)
        {
            Matrix3d damper = lambda_ * Matrix3d::Identity();
            Vector3d delta = (hessian_ + damper).inverse()*hb_;
            double aa = a_ + delta(0);
            double bb = b_ + delta(1);
            double cc = c_ + delta(2);
            delta_norm = delta.norm();
            double new_squared_error = 0.0;
            for(int i = 0;i<obs_.size();i++){
                double xi = obs_[i](0);
                double yi = obs_[i](1);
                double yy = aa*xi*xi+bb*xi+cc;
                new_squared_error += pow(yy-yi,2);
            }
            double rou = (current_squared_error-new_squared_error)/ 
                        (0.5*delta.transpose()*(lambda_*delta+hb_)+1e-3);
            if(rou>0){
                a_= aa,b_ = bb,c_=cc;
                lambda_ = lambda_*max(1.0/3.0, 1-pow((2*rou-1),3));
                v = 2;
                break;
            }else{
                lambda_ = lambda_ *v;
                v *= 2;
            }
        }
        if(delta_norm< epsilon_){
            break;
        }
        computeJacobian();
        computeHb();
    }
    cout<< a_<<" "<<b_<<" "<<c_<<endl;
    return 0;
}


void initLambda(){
    double max_diagnal = 0.;
    for(int i=0;i<3;i++){
        max_diagnal = max(fabs(hessian_(i,i)), max_diagnal);
    }
    lambda_ = tau_ * max_diagnal;
}

void computeJacobian(){
    jacobian_.resize(obs_.size(),3);
    errors_.resize(obs_.size());
    for(int i=0;i<obs_.size();i++){
        double xi = obs_[i][0];
        double yi = obs_[i][1];
        Matrix<double,1,3> jac;
        double yy = a_*xi*xi+b_*xi+c_;
        jac(0,0) = xi*xi;
        jac(0,1) = xi;
        jac(0,2) = 1;
        jacobian_.row(i) = jac;
        errors_(i) = yy-yi;
    }
}

void computeHb(){
    hessian_ = jacobian_.transpose()*jacobian_;
    hb_  = -jacobian_.transpose()*errors_;
}

5、Ceres示例

#include<bits/stdc++.h>
#include<eigen3/Eigen/Dense>
#include<ceres/ceres.h>
using namespace std;
using namespace Eigen;

struct costFunc
{
    costFunc(double x, double y):x_(x),y_(y){}
    template<typename T>
    bool operator()(const T* const a,const T* const b, const T* const c, T* residual) const{
        residual[0] = T(y_) - (a[0]*x_*x_+b[0]*x_+c[0]);
        return true;
    }
    const double x_;
    const double y_;
};


int main(){
    vector<Vector2d> obs;
    std::default_random_engine generator;
    std::normal_distribution<double> noise(0,0.1);
    double a = 1.,b = 2., c= 3.;
    double a0 = 0,b0= 0,c0 = 0;
    cout<< "original: " << a0<<" "<<b0<<" "<<c0<<endl;
    for(auto x=1.0;x<100;x+=1.){
        double y = a*x*x+b*x+c + noise(generator);
        obs.push_back(Vector2d(x,y));
    }

    ceres::Problem problem;
    for(int i=0;i<obs.size();i++){
        ceres::CostFunction* pCostFunction = new ceres::AutoDiffCostFunction<costFunc,1,1,1,1>(
            new costFunc(obs[i][0],obs[i][1]));
        problem.AddResidualBlock(pCostFunction, nullptr, &a0,&b0,&c0);
    }

    ceres::Solver::Options options;
    options.linear_solver_type = ceres::DENSE_QR;
    options.minimizer_progress_to_stdout = true;
    ceres::Solver::Summary summary;
    ceres::Solve(options,&problem,&summary);

    cout<< summary.BriefReport()<<endl;
    cout<< "optimized: " << a0<<" "<<b0<<" "<<c0<<endl;
    return 0;
}

解析求导形式：

#include<bits/stdc++.h>
#include<eigen3/Eigen/Dense>
#include<ceres/ceres.h>
using namespace std;
using namespace Eigen;

class CostFunc: public ceres::SizedCostFunction<1,3>{
public:
    CostFunc(double x , double y):x_(x),y_(y){}
    virtual ~CostFunc(){}
    virtual bool Evaluate(double const* const* parameters, double* residuals, double** jacobians) const{
        const double a = parameters[0][0];
        const double b = parameters[0][1];
        const double c = parameters[0][2];

        residuals[0] = a*x_*x_+b*x_+c -y_;
        if(!jacobians) return true;
        double* jac = jacobians[0];
        if(!jac) return true;
        jac[0] = x_*x_;
        jac[1] = x_;
        jac[2] = 1;
        return true;
    }

private:
    const double x_;
    const double y_;
};


int main(){
    vector<Vector2d> obs;
    std::default_random_engine generator;
    std::normal_distribution<double> noise(0,0.1);
    double a = 1.,b = 2., c= 3.;
    double a0 = 0,b0= 0,c0 = 0;
    cout<< "original: " << a0<<" "<<b0<<" "<<c0<<endl;
    for(auto x=1.0;x<100;x+=1.){
        double y = a*x*x+b*x+c + noise(generator);
        obs.push_back(Vector2d(x,y));
    }
    double param[3] = {a0,b0,c0};
    ceres::Problem problem;
    for(int i=0;i<obs.size();i++){
        ceres::CostFunction* pCostFunction = new CostFunc(obs[i][0],obs[i][1]);
        problem.AddResidualBlock(pCostFunction, nullptr, param);
    }

    ceres::Solver::Options options;
    options.linear_solver_type = ceres::DENSE_QR;
    options.minimizer_progress_to_stdout = true;
    ceres::Solver::Summary summary;
    ceres::Solve(options,&problem,&summary);

    cout<< summary.BriefReport()<<endl;
    cout<< "optimized: " << param[0]<<" "<<param[1]<<" "<<param[2]<<endl;
    return 0;
}