DSO 数据结构之 Accumulator9

Accumulator9在CoarseInitializer::calcResAndGS函数中使用,首先不管这个数据结构在CoarseInitializer::calcResAndGS中怎么使用,我们先简单测试下这个数据结构的作用和使用方法:

Accumulator9 acc9;
acc9.initialize();
float j0 = 10.0;
float j1 = 11.0;
float j2 = 12.0;
float j3 = 13.0;
float j4 = 14.0;
float j5 = 15.0;
float j6 = 16.0;
float j7 = 17.0;
float j8 = 18.0;
acc9.updateSingle(j0,j1,j2,j3,j4,j5,j6,j7,j8);
acc9.finish();
std::cout<<acc9.H<<std::endl;

打印结果:
这里写图片描述
我们初步可以看出它的作用是计算Hessian矩阵, 其具体数据结构如下:

class Accumulator9
{
public:
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW;

  Mat99f H; //[Hessian 矩阵 9*9]
  Vec9f b;
  size_t num;

  inline void initialize()
  {
    H.setZero();
    b.setZero();
    memset(SSEData,0, sizeof(float)*4*45);   // SSE指令加速计算
    memset(SSEData1k,0, sizeof(float)*4*45);
    memset(SSEData1m,0, sizeof(float)*4*45);
    num = numIn1 = numIn1k = numIn1m = 0;
  }

  inline void finish()
  {
    H.setZero();
    shiftUp(true);
    assert(numIn1==0);
    assert(numIn1k==0);

    int idx=0;
    for(int r=0;r<9;r++)
        for(int c=r;c<9;c++)
        {
           float d = SSEData1m[idx+0] + SSEData1m[idx+1] + SSEData1m[idx+2] +  SSEData1m[idx+3];
          H(r,c) = H(c,r) = d; // Hessian矩阵是对称的
            idx+=4;
        }
      assert(idx==4*45);
  }


  inline void updateSSE(
          const __m128 J0,const __m128 J1,
          const __m128 J2,const __m128 J3,
          const __m128 J4,const __m128 J5,
          const __m128 J6,const __m128 J7,
          const __m128 J8)
  {
      float* pt=SSEData;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J0))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J1))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J1))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J7,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J7,J8))); pt+=4;

      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J8,J8))); pt+=4;

      num+=4;
      numIn1++;
      shiftUp(false);
  }





  inline void updateSSE_eighted(
          const __m128 J0,const __m128 J1,
          const __m128 J2,const __m128 J3,
          const __m128 J4,const __m128 J5,
          const __m128 J6,const __m128 J7,
          const __m128 J8, const __m128 w)
  {
      float* pt=SSEData;

      __m128 J0w = _mm_mul_ps(J0,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J0))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J1))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J0w,J8))); pt+=4;

      __m128 J1w = _mm_mul_ps(J1,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J1))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J1w,J8))); pt+=4;

      __m128 J2w = _mm_mul_ps(J2,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J2))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J2w,J8))); pt+=4;

      __m128 J3w = _mm_mul_ps(J3,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J3))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J3w,J8))); pt+=4;

      __m128 J4w = _mm_mul_ps(J4,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4w,J4))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J4w,J8))); pt+=4;

      __m128 J5w = _mm_mul_ps(J5,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5w,J5))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J5w,J8))); pt+=4;

      __m128 J6w = _mm_mul_ps(J6,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6w,J6))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J6w,J8))); pt+=4;

      __m128 J7w = _mm_mul_ps(J7,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J7w,J7))); pt+=4;
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J7w,J8))); pt+=4;

      __m128 J8w = _mm_mul_ps(J8,w);
      _mm_store_ps(pt, _mm_add_ps(_mm_load_ps(pt),_mm_mul_ps(J8w,J8))); pt+=4;

      num+=4;
      numIn1++;
      shiftUp(false);
  }


  inline void updateSingle(
          const float J0,const float J1,
          const float J2,const float J3,
          const float J4,const float J5,
          const float J6,const float J7,
          const float J8, int off=0)
  {
      float* pt=SSEData+off;
      *pt += J0*J0; pt+=4;
      *pt += J1*J0; pt+=4;
      *pt += J2*J0; pt+=4;
      *pt += J3*J0; pt+=4;
      *pt += J4*J0; pt+=4;
      *pt += J5*J0; pt+=4;
      *pt += J6*J0; pt+=4;
      *pt += J7*J0; pt+=4;
      *pt += J8*J0; pt+=4;


      *pt += J1*J1; pt+=4;
      *pt += J2*J1; pt+=4;
      *pt += J3*J1; pt+=4;
      *pt += J4*J1; pt+=4;
      *pt += J5*J1; pt+=4;
      *pt += J6*J1; pt+=4;
      *pt += J7*J1; pt+=4;
      *pt += J8*J1; pt+=4;


      *pt += J2*J2; pt+=4;
      *pt += J3*J2; pt+=4;
      *pt += J4*J2; pt+=4;
      *pt += J5*J2; pt+=4;
      *pt += J6*J2; pt+=4;
      *pt += J7*J2; pt+=4;
      *pt += J8*J2; pt+=4;


      *pt += J3*J3; pt+=4;
      *pt += J4*J3; pt+=4;
      *pt += J5*J3; pt+=4;
      *pt += J6*J3; pt+=4;
      *pt += J7*J3; pt+=4;
      *pt += J8*J3; pt+=4;


      *pt += J4*J4; pt+=4;
      *pt += J5*J4; pt+=4;
      *pt += J6*J4; pt+=4;
      *pt += J7*J4; pt+=4;
      *pt += J8*J4; pt+=4;

      *pt += J5*J5; pt+=4;
      *pt += J6*J5; pt+=4;
      *pt += J7*J5; pt+=4;
      *pt += J8*J5; pt+=4;


      *pt += J6*J6; pt+=4;
      *pt += J7*J6; pt+=4;
      *pt += J8*J6; pt+=4;


      *pt += J7*J7; pt+=4;
      *pt += J8*J7; pt+=4;

      *pt += J8*J8; pt+=4;

      num++;
      numIn1++;
      shiftUp(false);
  }

  inline void updateSingleWeighted(
          float J0, float J1,
          float J2, float J3,
          float J4, float J5,
          float J6, float J7,
          float J8, float w,
          int off=0)
  {
    // 这里9个
      float* pt=SSEData+off;
      *pt += J0*J0*w; pt+=4; J0*=w;
      *pt += J1*J0; pt+=4;
      *pt += J2*J0; pt+=4;
      *pt += J3*J0; pt+=4;
      *pt += J4*J0; pt+=4;
      *pt += J5*J0; pt+=4;
      *pt += J6*J0; pt+=4;
      *pt += J7*J0; pt+=4;
      *pt += J8*J0; pt+=4;

    //这里8个,前面的J1*J0×已经计算过
      *pt += J1*J1*w; pt+=4; J1*=w;
      *pt += J2*J1; pt+=4;
      *pt += J3*J1; pt+=4;
      *pt += J4*J1; pt+=4;
      *pt += J5*J1; pt+=4;
      *pt += J6*J1; pt+=4;
      *pt += J7*J1; pt+=4;
      *pt += J8*J1; pt+=4;

        //这里7个
      *pt += J2*J2*w; pt+=4; J2*=w;
      *pt += J3*J2; pt+=4;
      *pt += J4*J2; pt+=4;
      *pt += J5*J2; pt+=4;
      *pt += J6*J2; pt+=4;
      *pt += J7*J2; pt+=4;
      *pt += J8*J2; pt+=4;

    //6
      *pt += J3*J3*w; pt+=4; J3*=w;
      *pt += J4*J3; pt+=4;
      *pt += J5*J3; pt+=4;
      *pt += J6*J3; pt+=4;
      *pt += J7*J3; pt+=4;
      *pt += J8*J3; pt+=4;

      //5
      *pt += J4*J4*w; pt+=4; J4*=w;
      *pt += J5*J4; pt+=4;
      *pt += J6*J4; pt+=4;
      *pt += J7*J4; pt+=4;
      *pt += J8*J4; pt+=4;

        //4
      *pt += J5*J5*w; pt+=4; J5*=w;
      *pt += J6*J5; pt+=4;
      *pt += J7*J5; pt+=4;
      *pt += J8*J5; pt+=4;

        //3
      *pt += J6*J6*w; pt+=4; J6*=w;
      *pt += J7*J6; pt+=4;
      *pt += J8*J6; pt+=4;

        //2
      *pt += J7*J7*w; pt+=4; J7*=w;
      *pt += J8*J7; pt+=4;
       //1
      *pt += J8*J8*w; pt+=4;

      num++;
      numIn1++;
      shiftUp(false);
  }


private:
  EIGEN_ALIGN16 float SSEData[4*45];  // 这里45是 1+2+3+4+5+..+9 = 45这是由于Hessian是对称阵。
  EIGEN_ALIGN16 float SSEData1k[4*45];
  EIGEN_ALIGN16 float SSEData1m[4*45];
  float numIn1, numIn1k, numIn1m;

//
  void shiftUp(bool force)
  {
      if(numIn1 > 1000 || force)
      {
          for(int i=0;i<45;i++)
                _mm_store_ps(SSEData1k+4*i,        _mm_add_ps(_mm_load_ps(SSEData+4*i),

        _mm_load_ps(SSEData1k+4*i)));//_mm_store_ps(output, i)
          numIn1k+=numIn1;
          numIn1=0;
          memset(SSEData,0, sizeof(float)*4*45);
      }

      if(numIn1k > 1000 || force)
      {
          for(int i=0;i<45;i++)
              _mm_store_ps(SSEData1m+4*i, _mm_add_ps(_mm_load_ps(SSEData1k+4*i),_mm_load_ps(SSEData1m+4*i)));
          numIn1m+=numIn1k;
          numIn1k=0;
          memset(SSEData1k,0, sizeof(float)*4*45);
      }
  }
};// 9

这里写图片描述
其中 9 是这么来的
JIJgeo6Jphoto2r1
这样计算出9*9的H矩阵之后
我们需要的
H=JTWJb=JTWr99H

H_out = acc9.H.topLeftCorner<8,8>();// / acc9.num;  H = JWJ
b_out = acc9.H.topRightCorner<8,1>();// / acc9.num; b = JWr
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值