参考了,网名叫伊卡洛斯,https://zhuanlan.zhihu.com/p/50801189
他的另一篇SGBM的注释也是写得最好的
先贴上整个过程函数调用关系
和sgbm一样的,x方向的sobel
[ -1,0,1]
[ -2,0,2]
[-1,0,1]
求梯度,也可能不用梯度,而是用prefilterNorm,
static void
prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
{
int x, y;
const int OFS = 256*4, TABSZ = OFS*2 + 256;
uchar tab[TABSZ] = { 0 };
Size size = src.size();
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
//ftzero默认31,-31 ->0 , 31 -> 62全变正数
uchar val0 = tab[0 + OFS];
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
for( y = 0; y < size.height-1; y += 2 )
{
const uchar* srow1 = src.ptr<uchar>(y);
const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1; // 这是BORDER_REPLICATE_101类型的扩展
const uchar* srow2 = y < size.height-1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
const uchar* srow3 = y < size.height-2 ? srow1 + src.step*2 : srow1;
//4行,可以算2行的sobel
uchar* dptr0 = dst.ptr<uchar>(y);
uchar* dptr1 = dptr0 + dst.step;
dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0; //左右两边的sobel=1f, BORDER_REPLICATE_101类型的扩展的结果就是1f
x = 1;
for( ; x < size.width-1; x++ )
{
int d0 = srow0[x+1] - srow0[x-1], d1 = srow1[x+1] - srow1[x-1],
d2 = srow2[x+1] - srow2[x-1], d3 = srow3[x+1] - srow3[x-1];
int v0 = tab[d0 + d1*2 + d2 + OFS];
int v1 = tab[d1 + d2*2 + d3 + OFS];
dptr0[x] = (uchar)v0;
dptr1[x] = (uchar)v1;
}
}
for( ; y < size.height; y++ ) //height是偶数行,不会进来,奇数行,这行sobel全设1f,31
{
uchar* dptr = dst.ptr<uchar>(y);
x = 0;
for(; x < size.width; x++ )
dptr[x] = val0;
}
}
template <typename mType>
static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const StereoBMParams& state,
uchar* buf, int _dy0, int _dy1, const int disp_shift )
{
const int ALIGN = 16;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
int ndisp = state.numDisparities;
int mindisp = state.minDisparity;
//如果mindisp=12,深度ndisp=128的话,深度的范围是12~139,lofs=139左图的起始点是x=139,
//左图x=139之前的点都不需要在右图搜索匹配点,对应的深度值都是无效值
//左图x=139的点, 在右图x=0到x=127的这些点搜索匹配点,
int lofs = MAX(ndisp - 1 + mindisp, 0);
//这个rofs一般是0,难不成ndisp+mindisp会是负数
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
mType FILTERED = (mType)((mindisp - 1) << disp_shift);
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr;
mType* dptr = disp.ptr<mType>();
int sstep = (int)left.step;
int dstep = (int)(disp.step/sizeof(dptr[0]));
int cstep = (height+dy0+dy1)*ndisp;
int costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
//prefilterXSobel得到的梯度是加过ftzero了,范围是0~2*ftzero-1, 这里减去ftzero, 范围变成-ftzero~ftzero-1, 绝对值越大,图像变化越大,纹理越大
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);
// initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
//winsize=5, 那图像上下左右都要扩展2行,像SGBM那样,第一行复制两行?
//实际调试发现并不是这样,而是实际有效行从第2行才开始算,0,1行算无效行了,
//大家可以去看一下外层函数compute里的validDisparityRect,有效disparity区域,
//图像宽1280,高960,winsize=5,视差范围是12~139,SGBM里有效区域是x 140~1279,y 0~959
//BM里又缩小了winsize/2,有效区域x 142~1277,y 2~957
//这个函数传进来的参数left,right,图像起始数据已经偏向第二行了,图像的高也变成956
for( x = -wsz2-1; x < wsz2; x++ )
{
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-ndisp) - dy0*sstep;
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
{
int lval = lptr[0];
d = 0;
for( ; d < ndisp; d++ )
{
int diff = std::abs(lval - rptr[d]);
//所谓的cbuf,就是cost buf,sgbm里所谓的代价,bm里就是简化成最简单的2个点的梯度值的差
cbuf[d] = (uchar)diff;
//hsad, 就是horizontal sad, 窗口大小是5,就是水平方向的5个点之和
hsad[d] = (int)(hsad[d] + diff);
}
//htext,也是水平方向的5个纹理相加
htext[y] += tab[lval];
//假如窗口大小是5,wsz2=2, x从-3循环到1,-3,-2,-1,0,1, 为什么从-3开始,
//正确的窗口是[-2,-1,0,1,2], -3不包括在内,我引用的文章里有解释,是为了后面代码的统一,
//代码保持滑动窗口的减去最老的点,加上最新的点,
//假如这里从hsad从-2算到1,就是[-2,-1,0,1]算4点,
//那下面mark1处的代码就会变成
// if (x==0)
// hsad[d] = hsad[d] + diff;
// else
// hsad[d] = hsad[d] + diff-cbuf_sub[d];
// 这样就增加了一个if,如果深度范围d取值很大的话,一个if在fpga里就会增加上千个lut
}
}
// initialize the left and right borders of the disparity map
//顾名思义,边界的深度值都是无效值,FILTERED就是上面初始化的最小的深度-1,也就是无效值
for( y = 0; y < height; y++ )
{
for( x = 0; x < lofs; x++ )
dptr[y*dstep + x] = FILTERED;
for( x = lofs + width1; x < width; x++ )
dptr[y*dstep + x] = FILTERED;
}
dptr += lofs;
//x在外层循环,也就是是一列一列来处理的,不是一行一行
//这里width1,在mindisp不为0时调试下来是有bug的,dptr会写超,超出一行宽度
for( x = 0; x < width1; x++, dptr++ )
{
//这里又是一个bug,外面创建cost是CV_16S, 这里却按int指针来存,应该改成short
int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
int x0 = x - wsz2 - 1, x1 = x + wsz2;
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
hsad = hsad0 - dy0*ndisp;
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;
for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
{
int lval = lptr[0];
d = 0;
for( ; d < ndisp; d++ )
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = hsad[d] + diff - cbuf_sub[d]; //Mark1
}
htext[y] += tab[lval] - tab[lptr_sub[0]];
}
//边界,调试下来是上下各扩展一行,winsize=5,这里的dy0=dy1=wsz2=2
//winsize=5,前面tsum预加从htext -3行,-2行,-1行,-0行,1行,扩展的一行是-3行复制-2行,其实无论复制什么都无所谓,-3行是要作为无效行减掉的
//height+2行扩展了没用到,下面finally, start the real processing这个大循环里没有用到height+1行,
// fill borders
for( y = dy1; y <= wsz2; y++ )
htext[height+y] = htext[height+dy1-1];
for( y = -wsz2-1; y < -dy0; y++ )
htext[y] = htext[-dy0];
//这里wsz2+2-dy0=2,就是-dy0,也就是-2行乘以2,和前面hsad扩展复制一行的效果一样,复制一行,也是-2行乘以2
//下面减的时候, hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; 减-3行就是减-2行,MAX(-3,-2)=-2
// initialize sums
for( d = 0; d < ndisp; d++ )
sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
hsad = hsad0 + (1 - dy0)*ndisp;
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
{
d = 0;
for( ; d < ndisp; d++ )
sad[d] = (int)(sad[d] + hsad[d]);
}
int tsum = 0;
for( y = -wsz2-1; y < wsz2; y++ )
tsum += htext[y];
//Mark2
// a a a a a|
// x x x x x|
// x x x x x|
// x x x x x|
// x x x x x|
// ---------
// b c d e f g
//还是拿这个图来解释,原理非常简单,但是简单的原理用代码来实现就是这么一堆复杂的代码,
//假如滑动窗口5x5
//hsad,水平5个相加,hsad放到hsad0这个buffer里的,
//当前一行的hsad,在f位置hsad=b+c+d+e+f,在g位置就是hsad=c+d+e+f+g,所以窗口横向推进1格,hsad=hsad+g-b, 就是加上最新的值,减去最老的值,
//上面Mark1处的这个循环把一列里的hsad全都算好了,也就是一列hsad全部做了加上最新值,减去最老值的动作,然后保存到buffer里,
//hsad[d] + diff - cbuf_sub[d], diff就是最新值,cbuf_sub[d]就是最老值
//这个循环处理y方向的窗口推进,就是加上最新的行,c,d,e,f,g这行,减去最老的行,就是a,a,a,a,a这行
// finally, start the real processing
for( y = 0; y < height; y++ )
{
int minsad = INT_MAX, mind = -1;
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
d = 0;
for( ; d < ndisp; d++ )
{
int currsad = sad[d] + hsad[d] - hsad_sub[d]; // hsad[d]就是最新行,hsad_sub[d]就是最老行
sad[d] = currsad;
if( currsad < minsad ) //同时求最佳视差,也就是加窗之后的代价最小的这个视差
{
minsad = currsad;
mind = d;
}
}
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if( tsum < textureThreshold )
{
dptr[y*dstep] = FILTERED;
continue;
}
//唯一性,和SGBM一样,
//唯一性,什么叫唯一性,理想的情况下,比如说最大深度是128,最佳视差是50,最佳视差对应的代价最小,其余的视差都是错的,对应的代价都很大,如果有两个视差的代价都很小,那就不唯一,
//比如mind,最佳视差是50,它对应的sad=100,其他视差,他们对应的sad也是100,那就不唯一了,其他视差对应的sad必须大于小于100一定程度
if( uniquenessRatio > 0 )
{
int thresh = minsad + (minsad * uniquenessRatio/100);
for( d = 0; d < ndisp; d++ )
{
if( (d < mind-1 || d > mind+1) && sad[d] <= thresh)
break;
}
if( d < ndisp )
{
dptr[y*dstep] = FILTERED;
continue;
}
}
{
sad[-1] = sad[1];
sad[ndisp] = sad[ndisp-2];
int p = sad[mind+1], n = sad[mind-1];
d = p + n - 2*sad[mind] + std::abs(p - n);
dptr[y*dstep] = (mType)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15)
>> (DISPARITY_SHIFT_32S - disp_shift));
//为什么是ndisp-1+mindisp - mind,反一下,下面画个图方便大家理解
costptr[y*coststep] = sad[mind];
}
}
}
}
假设视差范围是0~127,ndisp=128, 左图中x=200,y=100的点,在右图中寻找匹配点的搜寻范围是
x=200-0 ~ x=200-127, 即x=200,y=100到x=73,y=100之间的点,
匹配d=0的点,是右图的x=200,y=100
匹配d=1的点,是右图的x=199,y=100,
匹配d=127的点,是右图的x=73,y=100
而代码中for(d=0;d<ndisp;d++)
d=0的点对应的点是rptr[0], 即最左边的x=73,y=100的点,实际这个点的视差是127,不是0