图像分割—mean shift(OpenCV源码注解)

关于meanshitf的介绍:

mean shift 图像分割 (一)1 总体思想,2 算法步骤

mean shift 图像分割 (二)3 算法原理,4 延伸

mean shift 图像分割 (三)5 非参数密度估计


不得不说,这个OpenCV实现实在不咋地,

这次我改风格了,中英文杂合注释


main.cpp

#include "opencv2/opencv.hpp"


#include <iostream>

using namespace cv;
using namespace std;



static void help(char** argv)
{
	cout << "\nDemonstrate mean-shift based color segmentation in spatial pyramid.\n"
		<< "Call:\n   " << argv[0] << " image\n"
		<< "This program allows you to set the spatial and color radius\n"
		<< "of the mean shift window as well as the number of pyramid reduction levels explored\n"
		<< endl;
}

//This colors the segmentations
static void floodFillPostprocess(Mat& img, const Scalar& colorDiff = Scalar::all(1))
{
	CV_Assert(!img.empty());
	RNG rng = theRNG();
	Mat mask(img.rows + 2, img.cols + 2, CV_8UC1, Scalar::all(0));
	for (int y = 0; y < img.rows; y++)
	{
		for (int x = 0; x < img.cols; x++)
		{
			if (mask.at<uchar>(y + 1, x + 1) == 0)
			{
				Scalar newVal(rng(256), rng(256), rng(256));
				floodFill(img, mask, Point(x, y), newVal, 0, colorDiff, colorDiff);
			}
		}
	}

}

string winName = "meanshift";
int spatialRad, colorRad, maxPyrLevel;
Mat img, res;

static void meanShiftSegmentation(int, void*)
{
	cout << "spatialRad=" << spatialRad << "; "
		<< "colorRad=" << colorRad << "; "
		<< "maxPyrLevel=" << maxPyrLevel << endl;
	pyrMeanShiftFiltering(img, res, spatialRad, colorRad, maxPyrLevel);
	floodFillPostprocess(res, Scalar::all(2));
	imshow(winName, res);
}


int main(int argc, char** argv)
{

	//if (argc != 2)
	//{
	//	help(argv);
	//	return -1;
	//}
	string fimg = "G:/Pic/fruits.jpg";//"G:/Pic/2012060619243397.png";
	img = imread(fimg);
	if (img.empty())
		return -1;
	//640-by-480  it works well to 	set spatialRadiusequal = 2 and colorRadiusequal = 40
	// max_level, which describes how many levels of scale pyramid you want 
	//used for segmentation.A max_levelof 2 or 3 works well for a 640 - by - 480 color image
	spatialRad = 40;
	colorRad = 22;
	maxPyrLevel = 2;

	namedWindow(winName, WINDOW_AUTOSIZE);

	createTrackbar("spatialRad", winName, &spatialRad, 80, meanShiftSegmentation);
	createTrackbar("colorRad", winName, &colorRad, 60, meanShiftSegmentation);
	createTrackbar("maxPyrLevel", winName, &maxPyrLevel, 5, meanShiftSegmentation);

	meanShiftSegmentation(0, 0);
	waitKey();
	return 0;
}

segmentation.cpp

/*M///
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"



/****************************************************************************************\
*                                         Meanshift                                      *
\****************************************************************************************/

CV_IMPL void
cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
                         double sp0, double sr, int max_level,
                         CvTermCriteria termcrit )
{
    const int cn = 3;
    const int MAX_LEVELS = 8;

    if( (unsigned)max_level > (unsigned)MAX_LEVELS )
        CV_Error( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );

    std::vector<cv::Mat> src_pyramid(max_level+1);
    std::vector<cv::Mat> dst_pyramid(max_level+1);
    cv::Mat mask0;
    int i, j, level;
    //uchar* submask = 0;

    #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \
        tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22) // color diffference  Note: it‘s >= not <

    double sr2 = sr * sr;// sr: ||x|| color window radius 
    int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
    int tab[768]; // 256*,lookup table for fast square distance  computation 
    cv::Mat src0 = cv::cvarrToMat(srcarr);
    cv::Mat dst0 = cv::cvarrToMat(dstarr);

    if( src0.type() != CV_8UC3 )
        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );

    if( src0.type() != dst0.type() )
        CV_Error( CV_StsUnmatchedFormats, "The input and output images must have the same type" );

    if( src0.size() != dst0.size() )
        CV_Error( CV_StsUnmatchedSizes, "The input and output images must have the same size" );

    if( !(termcrit.type & CV_TERMCRIT_ITER) )
        termcrit.max_iter = 5; // if we don't have set the number of iterations ,it will iterate 5  times at most .   
    termcrit.max_iter = MAX(termcrit.max_iter,1);
    termcrit.max_iter = MIN(termcrit.max_iter,100); // max iteration is 100
    if( !(termcrit.type & CV_TERMCRIT_EPS) )
        termcrit.epsilon = 1.f; // the default epsilon 
    termcrit.epsilon = MAX(termcrit.epsilon, 0.f);

    for( i = 0; i < 768; i++ )
        tab[i] = (i - 255)*(i - 255); //tab[0]=255^2,tab[255]=0,tab[512]=255^2

    // 1. construct pyramid
    src_pyramid[0] = src0;
    dst_pyramid[0] = dst0;
    for( level = 1; level <= max_level; level++ )
    {
        src_pyramid[level].create( (src_pyramid[level-1].rows+1)/2,
                        (src_pyramid[level-1].cols+1)/2, src_pyramid[level-1].type() );// plus 1 to ensure both the clos and row are even 
        dst_pyramid[level].create( src_pyramid[level].rows,
                        src_pyramid[level].cols, src_pyramid[level].type() ); //
        cv::pyrDown( src_pyramid[level-1], src_pyramid[level], src_pyramid[level].size() );// first using Gaussian blure then  removing every even-numbered row and column
        //CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));
    }

    mask0.create(src0.rows, src0.cols, CV_8UC1);  // memory buf for mask of every scale 
    //CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));

    // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)
    for( level = max_level; level >= 0; level-- )
    {
        cv::Mat src = src_pyramid[level]; // the current processing layer 
        cv::Size size = src.size();
        uchar* sptr = src.data; //  
        int sstep = (int)src.step;// all bytee in a row(including the padded pixels )
        uchar* mask = 0;
        int mstep = 0;
        uchar* dptr;
        int dstep;
        float sp = (float)(sp0 / (1 << level)); // spatial window radius,keep the contents which the kernel can cover are identical     
        sp = MAX( sp, 1 );

        if( level < max_level ) //except for the top level,先跳过,其实也可以忽略
        {
            cv::Size size1 = dst_pyramid[level+1].size(); // notice that layer level+1 has been  processed 
            cv::Mat m( size.height, size.width, CV_8UC1, mask0.data );  // Note that the memory to which .data point don't have the same size as the m. Howerver,the former will alway large or equal to the later. We just use the mask0 as an big enough container that only allocate one time. 
            dstep = (int)dst_pyramid[level+1].step;//
            dptr = dst_pyramid[level+1].data + dstep + cn; //jump the first row and first cloumn(including 3 channels) 
            mstep = (int)m.step;
            mask = m.data + mstep;//jump the first  row
            //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
            cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() ); // 这一行有意义吗?完全可以去掉啊?????
			// Note:the image is first upsized with new even rows and cols filled with 0s ,thereafter the missing values is approximated with the Gaussian convolution.
            m.setTo(cv::Scalar::all(0));

            for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
			// dptr + dstep + width*3*2 : jump to the second point of the next row 
			// mstep*2  : 2 row in mask is correspondence to 1 rows in dst_pyramid[level+1]; 
                for( j = 1; j < size1.width-1; j++, dptr += cn )//jump the first and the last column,Notice that before jump to the next row,the dptr have pointed to the last column
                {
                    int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
                    mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||
                        cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);//if any of it's 8 neigbours doesn't similar with it in color sapce,it should be proceed  which labeled with 1
                }
            }

            cv::dilate( m, m, cv::Mat() );
            mask = m.data;
        }

        dptr = dst_pyramid[level].data;
        dstep = (int)dst_pyramid[level].step;

        for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,// jumpping the padding bytes in the ends of each row of src 
                                          dptr += dstep - size.width*3,// jumpping the padding bytes in the ends of each row of src
                                          mask += mstep ) // the offset of mask
        {
            for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
            {
                int x0 = j, y0 = i, x1, y1, iter;// x1,y1: the position of mode 
                int c0, c1, c2;

                if( mask && !mask[j] ) // 可以忽略,mask !=0:except for the top level, mask[j]==0: similar to all of it's 8 neighbors
                    continue;

                c0 = sptr[0], c1 = sptr[1], c2 = sptr[2]; // b,g,r or L,u,v of the central position 

                // iterate meanshift procedure,核心部分
                for( iter = 0; iter < termcrit.max_iter; iter++ )
                {
                    uchar* ptr;
                    int x, y, count = 0; // count : count the number of pixels whithin the color support in a square window  
                    int minx, miny, maxx, maxy;
                    int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;//
                    double icount;
                    int stop_flag;

                    //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
					//a square window of (2*sp+1)*(2*sp+1)
                    minx = cvRound(x0 - sp); minx = MAX(minx, 0);//ensure minx doesn't less than the first column 
                    miny = cvRound(y0 - sp); miny = MAX(miny, 0);//ensure minx doesn't less than the first row
                    maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);
                    maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);
                    ptr = sptr + (miny - i)*sstep + (minx - j)*3;// move to (minx,miny)

                    for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 ) 
                    {
                        int row_count = 0; // count the number of pixels whthin the color support in a row
                        x = minx;
                        #if CV_ENABLE_UNROLLED //展开,先跳过
                        for( ; x + 3 <= maxx; x += 4, ptr += 12 )//process 4 colums every cycle to reduce the cycle times. it will be  faster than loop every column
                        {
                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x; row_count++;
                            }
                            t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+1; row_count++;
                            }
                            t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+2; row_count++;
                            }
                            t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+3; row_count++;
                            }
                        }
                        #endif
                        for( ; x <= maxx; x++, ptr += 3 ) // if we have defined CV_ENABLE_UNROLLED then processing the remain (maxx+1)%4 cloumns otherwise processing all of columns
                        {
                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; //b,g,r
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )//truncate with isr2 to have finite support(color similarity ) 
                            {  // the value [-255,255] first map to [0 510],then map to the squre distance to central position (i,j)
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x; 
								row_count++;
                            }
                        }
                        count += row_count;
                        sy += y*row_count;
                    }

                    if( count == 0 )
                        break;

                    icount = 1./count;
                    x1 = cvRound(sx*icount); // x mean 
                    y1 = cvRound(sy*icount); // Y mean 
                    s0 = cvRound(s0*icount); // b mean 
                    s1 = cvRound(s1*icount); // g mean 
                    s2 = cvRound(s2*icount); // r mean

                    stop_flag = (x0 == x1 && y0 == y1) || abs(x1-x0) + abs(y1-y0) + // converge to (i,j)
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +
                        tab[s2 - c2 + 255] <= termcrit.epsilon; //movement can be ignored

                    x0 = x1; y0 = y1;
                    c0 = s0; c1 = s1; c2 = s2;// Notice;the center color was replaced by the filtered value  

                    if( stop_flag )
                        break;
                }
				
                dptr[0] = (uchar)c0; //assign the filtered value of converging point to the starting point
                dptr[1] = (uchar)c1;
                dptr[2] = (uchar)c2;
            }
        }
    }
}

void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
                                double sp, double sr, int maxLevel,
                                TermCriteria termcrit )
{
    Mat src = _src.getMat();

    if( src.empty() )
        return;

    _dst.create( src.size(), src.type() );
    CvMat c_src = src, c_dst = _dst.getMat();
    cvPyrMeanShiftFiltering( &c_src, &c_dst, sp, sr, maxLevel, termcrit );
}


已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客 返回首页