OpenCV StereoBM深度提取算法自己的一些注释

最新推荐文章于 2023-02-22 10:12:43 发布

tianqishi

最新推荐文章于 2023-02-22 10:12:43 发布

阅读量579

点赞数 3

本文链接：https://blog.csdn.net/tianqishi/article/details/118729604

版权

参考了，网名叫伊卡洛斯，https://zhuanlan.zhihu.com/p/50801189

他的另一篇SGBM的注释也是写得最好的

先贴上整个过程函数调用关系

和sgbm一样的，x方向的sobel

[ -1,0,1]

[ -2,0,2]

[-1,0,1]

求梯度，也可能不用梯度，而是用prefilterNorm，


static void

prefilterXSobel( const Mat& src, Mat& dst, int ftzero )

{

    int x, y;

    const int OFS = 256*4, TABSZ = OFS*2 + 256;

    uchar tab[TABSZ] = { 0 };

    Size size = src.size();



    for( x = 0; x < TABSZ; x++ )

        tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);



//ftzero默认31，-31   ->0   ,    31  -> 62全变正数





    uchar val0 = tab[0 + OFS];



#if CV_SIMD128

    bool useSIMD = hasSIMD128();

#endif



    for( y = 0; y < size.height-1; y += 2 )

    {

        const uchar* srow1 = src.ptr<uchar>(y);

        const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1; // 这是BORDER_REPLICATE_101类型的扩展

        const uchar* srow2 = y < size.height-1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;

        const uchar* srow3 = y < size.height-2 ? srow1 + src.step*2 : srow1;



//4行，可以算2行的sobel



        uchar* dptr0 = dst.ptr<uchar>(y);

        uchar* dptr1 = dptr0 + dst.step;



        dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0;    //左右两边的sobel=1f, BORDER_REPLICATE_101类型的扩展的结果就是1f

        x = 1;



        for( ; x < size.width-1; x++ )

        {

            int d0 = srow0[x+1] - srow0[x-1], d1 = srow1[x+1] - srow1[x-1],

            d2 = srow2[x+1] - srow2[x-1], d3 = srow3[x+1] - srow3[x-1];

            int v0 = tab[d0 + d1*2 + d2 + OFS];

            int v1 = tab[d1 + d2*2 + d3 + OFS];

            dptr0[x] = (uchar)v0;

            dptr1[x] = (uchar)v1;

        }

    }



    for( ; y < size.height; y++ )    //height是偶数行，不会进来，奇数行，这行sobel全设1f，31

    {

        uchar* dptr = dst.ptr<uchar>(y);

        x = 0;

        for(; x < size.width; x++ )

            dptr[x] = val0;

    }

}


template <typename mType>
static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
                           Mat& disp, Mat& cost, const StereoBMParams& state,
                           uchar* buf, int _dy0, int _dy1, const int disp_shift )
{

    const int ALIGN = 16;
    int x, y, d;
    int wsz = state.SADWindowSize, wsz2 = wsz/2;
    int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
    int ndisp = state.numDisparities;
    int mindisp = state.minDisparity;


//如果mindisp=12,深度ndisp=128的话，深度的范围是12~139,lofs=139左图的起始点是x=139，
//左图x=139之前的点都不需要在右图搜索匹配点，对应的深度值都是无效值
//左图x=139的点， 在右图x=0到x=127的这些点搜索匹配点, 
int lofs = MAX(ndisp - 1 + mindisp, 0);   

 //这个rofs一般是0，难不成ndisp+mindisp会是负数
    int rofs = -MIN(ndisp - 1 + mindisp, 0); 

    int width = left.cols, height = left.rows;
    int width1 = width - rofs - ndisp + 1;
    int ftzero = state.preFilterCap;
    int textureThreshold = state.textureThreshold;
    int uniquenessRatio = state.uniquenessRatio;
    mType FILTERED = (mType)((mindisp - 1) << disp_shift);



    int *sad, *hsad0, *hsad, *hsad_sub, *htext;
    uchar *cbuf0, *cbuf;
    const uchar* lptr0 = left.ptr() + lofs;
    const uchar* rptr0 = right.ptr() + rofs;
    const uchar *lptr, *lptr_sub, *rptr;
    mType* dptr = disp.ptr<mType>();
    int sstep = (int)left.step;
    int dstep = (int)(disp.step/sizeof(dptr[0]));
    int cstep = (height+dy0+dy1)*ndisp;
    int costbuf = 0;
    int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
    const int TABSZ = 256;
    uchar tab[TABSZ];

    sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
    hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
    htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
    cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);

    //prefilterXSobel得到的梯度是加过ftzero了，范围是0~2*ftzero-1, 这里减去ftzero,  范围变成-ftzero~ftzero-1, 绝对值越大，图像变化越大，纹理越大
    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)std::abs(x - ftzero);        

    // initialize buffers
    memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
    memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );


//winsize=5, 那图像上下左右都要扩展2行，像SGBM那样，第一行复制两行？
//实际调试发现并不是这样，而是实际有效行从第2行才开始算，0，1行算无效行了，
//大家可以去看一下外层函数compute里的validDisparityRect，有效disparity区域，
//图像宽1280，高960，winsize=5,视差范围是12~139，SGBM里有效区域是x 140~1279,y 0~959
//BM里又缩小了winsize/2，有效区域x 142~1277,y 2~957
//这个函数传进来的参数left,right,图像起始数据已经偏向第二行了，图像的高也变成956
    for( x = -wsz2-1; x < wsz2; x++ )
    {
        hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
        lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
        rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-ndisp) - dy0*sstep;
        for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
        {
            int lval = lptr[0];
            d = 0;

            for( ; d < ndisp; d++ )
            {
                int diff = std::abs(lval - rptr[d]);
//所谓的cbuf，就是cost buf，sgbm里所谓的代价，bm里就是简化成最简单的2个点的梯度值的差
                cbuf[d] = (uchar)diff;     
//hsad, 就是horizontal sad, 窗口大小是5，就是水平方向的5个点之和                     
                hsad[d] = (int)(hsad[d] + diff);      
            }
//htext，也是水平方向的5个纹理相加
            htext[y] += tab[lval];                 
//假如窗口大小是5，wsz2=2, x从-3循环到1，-3,-2,-1,0,1,  为什么从-3开始，
//正确的窗口是[-2,-1,0,1,2], -3不包括在内，我引用的文章里有解释，是为了后面代码的统一，
//代码保持滑动窗口的减去最老的点，加上最新的点，
//假如这里从hsad从-2算到1，就是[-2,-1,0,1]算4点，
//那下面mark1处的代码就会变成
//    if (x==0)  
//        hsad[d] = hsad[d] + diff;
//    else
//        hsad[d] = hsad[d] + diff-cbuf_sub[d];
// 这样就增加了一个if，如果深度范围d取值很大的话，一个if在fpga里就会增加上千个lut
       }
    }


    // initialize the left and right borders of the disparity map
//顾名思义，边界的深度值都是无效值，FILTERED就是上面初始化的最小的深度-1，也就是无效值
    for( y = 0; y < height; y++ )
    {
        for( x = 0; x < lofs; x++ )
            dptr[y*dstep + x] = FILTERED;
        for( x = lofs + width1; x < width; x++ )
            dptr[y*dstep + x] = FILTERED;
    }
    dptr += lofs;


//x在外层循环，也就是是一列一列来处理的，不是一行一行
//这里width1，在mindisp不为0时调试下来是有bug的，dptr会写超，超出一行宽度
    for( x = 0; x < width1; x++, dptr++ )
    {
//这里又是一个bug，外面创建cost是CV_16S, 这里却按int指针来存，应该改成short
        int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
        int x0 = x - wsz2 - 1, x1 = x + wsz2;
        const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
        cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
        hsad = hsad0 - dy0*ndisp;
        lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
        lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
        rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;

        for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
            hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
        {
            int lval = lptr[0];
            d = 0;

            for( ; d < ndisp; d++ )
            {
                int diff = std::abs(lval - rptr[d]);
                cbuf[d] = (uchar)diff;
                hsad[d] = hsad[d] + diff - cbuf_sub[d];             //Mark1
            }
            htext[y] += tab[lval] - tab[lptr_sub[0]];
        }
//边界，调试下来是上下各扩展一行，winsize=5，这里的dy0=dy1=wsz2=2
//winsize=5，前面tsum预加从htext -3行，-2行，-1行，-0行，1行，扩展的一行是-3行复制-2行，其实无论复制什么都无所谓，-3行是要作为无效行减掉的
//height+2行扩展了没用到，下面finally, start the real processing这个大循环里没有用到height+1行，
        // fill borders
        for( y = dy1; y <= wsz2; y++ )
            htext[height+y] = htext[height+dy1-1];
        for( y = -wsz2-1; y < -dy0; y++ )
            htext[y] = htext[-dy0];

 //这里wsz2+2-dy0=2，就是-dy0，也就是-2行乘以2，和前面hsad扩展复制一行的效果一样，复制一行，也是-2行乘以2
//下面减的时候， hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;  减-3行就是减-2行，MAX(-3,-2)=-2
        // initialize sums
        for( d = 0; d < ndisp; d++ )
            sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));

        hsad = hsad0 + (1 - dy0)*ndisp;
        for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
        {
            d = 0;

            for( ; d < ndisp; d++ )
                sad[d] = (int)(sad[d] + hsad[d]);
        }
        int tsum = 0;
        for( y = -wsz2-1; y < wsz2; y++ )
            tsum += htext[y];


//Mark2
//   a a a a a|
//   x x x x x|
//   x x x x x|
//   x x x x x|
//   x x x x x|
//   ---------
// b c d e f g

//还是拿这个图来解释，原理非常简单，但是简单的原理用代码来实现就是这么一堆复杂的代码，

//假如滑动窗口5x5
//hsad,水平5个相加,hsad放到hsad0这个buffer里的, 
//当前一行的hsad，在f位置hsad=b+c+d+e+f,在g位置就是hsad=c+d+e+f+g,所以窗口横向推进1格，hsad=hsad+g-b, 就是加上最新的值，减去最老的值，

//上面Mark1处的这个循环把一列里的hsad全都算好了，也就是一列hsad全部做了加上最新值，减去最老值的动作，然后保存到buffer里，
//hsad[d] + diff - cbuf_sub[d]， diff就是最新值，cbuf_sub[d]就是最老值

//这个循环处理y方向的窗口推进，就是加上最新的行,c,d,e,f,g这行，减去最老的行，就是a,a,a,a,a这行




        // finally, start the real processing
        for( y = 0; y < height; y++ )
        {
            int minsad = INT_MAX, mind = -1;
            hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
            hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
            d = 0;

            for( ; d < ndisp; d++ )
            {
                int currsad = sad[d] + hsad[d] - hsad_sub[d];         // hsad[d]就是最新行，hsad_sub[d]就是最老行
                sad[d] = currsad;
                if( currsad < minsad )          //同时求最佳视差，也就是加窗之后的代价最小的这个视差
                {
                    minsad = currsad;
                    mind = d;
                }
            }

            tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
            if( tsum < textureThreshold )
            {
                dptr[y*dstep] = FILTERED;
                continue;
            }

//唯一性，和SGBM一样，
//唯一性，什么叫唯一性，理想的情况下，比如说最大深度是128，最佳视差是50，最佳视差对应的代价最小，其余的视差都是错的，对应的代价都很大，如果有两个视差的代价都很小，那就不唯一，

//比如mind，最佳视差是50，它对应的sad=100，其他视差，他们对应的sad也是100，那就不唯一了，其他视差对应的sad必须大于小于100一定程度

            if( uniquenessRatio > 0 ) 
            {
                int thresh = minsad + (minsad * uniquenessRatio/100);
                for( d = 0; d < ndisp; d++ )
                {
                    if( (d < mind-1 || d > mind+1) && sad[d] <= thresh)
                        break;
                }
                if( d < ndisp )
                {
                    dptr[y*dstep] = FILTERED;
                    continue;
                }
            }

            {
                sad[-1] = sad[1];
                sad[ndisp] = sad[ndisp-2];
                int p = sad[mind+1], n = sad[mind-1];
                d = p + n - 2*sad[mind] + std::abs(p - n);
                dptr[y*dstep] = (mType)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15)
                                 >> (DISPARITY_SHIFT_32S - disp_shift));
//为什么是ndisp-1+mindisp - mind,反一下，下面画个图方便大家理解
                costptr[y*coststep] = sad[mind];


            }
        }
    }
}