中值滤波--sj

原创 2015年04月22日 22:17:31

// stdafx.h : 标准系统包含文件的包含文件,
// 或是经常使用但不常更改的
// 特定于项目的包含文件
//


#pragma once


#include "targetver.h"


#include <stdio.h>
#include <tchar.h>






// TODO: 在此处引用程序需要的其他头文件


#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>










void ctmf(
    unsigned char* src, unsigned char* dst,
    int width, int height,
    int src_step_row, int dst_step_row,
    int r, int channels, unsigned long memsize
    );


void Median3X3Filter(unsigned char *pSrc,unsigned char *pDst,int rect_w,int rect_h,int nWidth);














// stdafx.cpp : 只包括标准包含文件的源文件

// median3X3.pch 将作为预编译头
// stdafx.obj 将包含预编译类型信息


#include "stdafx.h"




void BubbleSort(int * bArray, int iFilterLen);
void QuickSortND(int SortData[],int len);




/* Type declarations */
#ifdef _MSC_VER
#include <basetsd.h>
typedef UINT8 uint8_t;
typedef UINT16 uint16_t;
typedef UINT32 uint32_t;
#pragma warning( disable: 4799 )
#else
#include <stdint.h>
#endif


#define __SSE2__




/* Intrinsic declarations */
#if defined(__SSE2__) || defined(__MMX__)
#if defined(__SSE2__)
#include <emmintrin.h>
#elif defined(__MMX__)
#include <mmintrin.h>
#endif
#if defined(__GNUC__)
#include <mm_malloc.h>
#elif defined(_MSC_VER)
#include <malloc.h>
#endif
#elif defined(__ALTIVEC__)
#include <altivec.h>
#endif


/* Compiler peculiarities */
#if defined(__GNUC__)
#include <stdint.h>
#define inline __inline__
#define align(x) __attribute__ ((aligned (x)))
#elif defined(_MSC_VER)
#define inline __inline
#define align(x) __declspec(align(x))
#else
#define inline
#define align(x)
#endif


#ifndef MIN
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#endif


#ifndef MAX
#define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif


/**
 * This structure represents a two-tier histogram. The first tier (known as the
 * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
 * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
 * coarse bucket designated by the 4 MSBs of the fine bucket value.
 *
 * The structure is aligned on 16 bytes, which is a prerequisite for SIMD
 * instructions. Each bucket is 16 bit wide, which means that extra care must be
 * taken to prevent overflow.
 */
typedef struct align(16)
{
    uint16_t coarse[16];
    uint16_t fine[16][16];
} Histogram;


/**
 * HOP is short for Histogram OPeration. This macro makes an operation \a op on
 * histogram \a h for pixel value \a x. It takes care of handling both levels.
 */
#define HOP(h,x,op) \
    h.coarse[x>>4] op; \
    *((uint16_t*) h.fine + x) op;


#define COP(c,j,x,op) \
    h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
    h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;


/**
 * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
 * SSE2, MMX or Altivec, if available.
 */






#if defined(__SSE2__)
static inline void histogram_add( const uint16_t x[16], uint16_t y[16] )
{
    *(__m128i*) &y[0] = _mm_add_epi16( *(__m128i*) &y[0], *(__m128i*) &x[0] );
    *(__m128i*) &y[8] = _mm_add_epi16( *(__m128i*) &y[8], *(__m128i*) &x[8] );
}
#elif defined(__MMX__)
static inline void histogram_add( const uint16_t x[16], uint16_t y[16] )
{
    *(__m64*) &y[0]  = _mm_add_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
    *(__m64*) &y[4]  = _mm_add_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
    *(__m64*) &y[8]  = _mm_add_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
    *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
}
#elif defined(__ALTIVEC__)
static inline void histogram_add( const uint16_t x[16], uint16_t y[16] )
{
    *(vector unsigned short*) &y[0] = vec_add( *(vector unsigned short*) &y[0], *(vector unsigned short*) &x[0] );
    *(vector unsigned short*) &y[8] = vec_add( *(vector unsigned short*) &y[8], *(vector unsigned short*) &x[8] );
}
#else
static inline void histogram_add( const uint16_t x[16], uint16_t y[16] )
{
    int i;
    for ( i = 0; i < 16; ++i ) {
        y[i] += x[i];
    }
}
#endif


/**
 * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
 * of SSE2, MMX or Altivec, if available.
 */
#if defined(__SSE2__)
static inline void histogram_sub( const uint16_t x[16], uint16_t y[16] )
{
    *(__m128i*) &y[0] = _mm_sub_epi16( *(__m128i*) &y[0], *(__m128i*) &x[0] );
    *(__m128i*) &y[8] = _mm_sub_epi16( *(__m128i*) &y[8], *(__m128i*) &x[8] );
}
#elif defined(__MMX__)
static inline void histogram_sub( const uint16_t x[16], uint16_t y[16] )
{
    *(__m64*) &y[0]  = _mm_sub_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
    *(__m64*) &y[4]  = _mm_sub_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
    *(__m64*) &y[8]  = _mm_sub_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
    *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
}
#elif defined(__ALTIVEC__)
static inline void histogram_sub( const uint16_t x[16], uint16_t y[16] )
{
    *(vector unsigned short*) &y[0] = vec_sub( *(vector unsigned short*) &y[0], *(vector unsigned short*) &x[0] );
    *(vector unsigned short*) &y[8] = vec_sub( *(vector unsigned short*) &y[8], *(vector unsigned short*) &x[8] );
}
#else
static inline void histogram_sub( const uint16_t x[16], uint16_t y[16] )
{
    int i;
    for ( i = 0; i < 16; ++i ) {
        y[i] -= x[i];
    }
}
#endif


static inline void histogram_muladd( const uint16_t a, const uint16_t x[16], uint16_t y[16] )
{
    int i;
    for ( i = 0; i < 16; ++i ) {
        y[i] += a * x[i];
    }
}


static void ctmf_helper(unsigned char* src, unsigned char* dst,int width,int height,int src_step,int dst_step,int r,int cn,int pad_left,  int pad_right)
{
    const int m = height, n = width;
    int i, j, k, c;
    const unsigned char *p, *q;


    Histogram H[4];
    uint16_t *h_coarse, *h_fine, luc[4][16];


    assert( src );
    assert( dst );
    assert( r >= 0 );
    assert( width >= 2*r+1 );
    assert( height >= 2*r+1 );
    assert( src_step != 0 );
    assert( dst_step != 0 );


    /* SSE2 and MMX need aligned memory, provided by _mm_malloc(). */
#if defined(__SSE2__) || defined(__MMX__)
    h_coarse = (uint16_t*) _mm_malloc(  1 * 16 * n * cn * sizeof(uint16_t), 16 );
    h_fine   = (uint16_t*) _mm_malloc( 16 * 16 * n * cn * sizeof(uint16_t), 16 );
    memset( h_coarse, 0,  1 * 16 * n * cn * sizeof(uint16_t) );
    memset( h_fine,   0, 16 * 16 * n * cn * sizeof(uint16_t) );
#else
    h_coarse = (uint16_t*) calloc(  1 * 16 * n * cn, sizeof(uint16_t) );
    h_fine   = (uint16_t*) calloc( 16 * 16 * n * cn, sizeof(uint16_t) );
#endif


    /* First row initialization */
    /*
    #define HOP(h,x,op) \
    h.coarse[x>>4] op; \
    *((uint16_t*) h.fine + x) op;


    #define COP(c,j,x,op) \
    h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
    h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
    */
    for ( j = 0; j < n; ++j ) 
    {
        for ( c = 0; c < cn; ++c )
        {
            //COP( c, j, src[cn*j+c], += r+1 );
            h_coarse[ 16*(n*c+j) + (src[cn*j+c]>>4) ] += r+1;
            h_fine[ 16 * (n*(16*c+(src[cn*j+c]>>4)) + j) + (src[cn*j+c] & 0xF) ] += r+1;
        }
    }
    for ( i = 0; i < r; ++i )
    {
        for ( j = 0; j < n; ++j ) 
        {
            for ( c = 0; c < cn; ++c )
            {
                //COP( c, j, src[src_step*i+cn*j+c], ++ );


                h_coarse[ 16*(n*c+j) + (src[src_step*i+cn*j+c]>>4) ]++;
                h_fine[ 16 * (n*(16*c+(src[src_step*i+cn*j+c]>>4)) + j) + (src[src_step*i+cn*j+c] & 0xF) ]++;


            }
        }
    }


    for ( i = 0; i < m; ++i ) 
    {


        /* Update column histograms for entire row. */
        p = src + src_step * MAX( 0, i-r-1 );
        q = p + cn * n;
        for ( j = 0; p != q; ++j ) 
        {
            for ( c = 0; c < cn; ++c, ++p )
            {
                COP( c, j, *p, -- );
            }
        }


        p = src + src_step * MIN( m-1, i+r );
        q = p + cn * n;
        for ( j = 0; p != q; ++j ) 
        {
            for ( c = 0; c < cn; ++c, ++p )
            {
                COP( c, j, *p, ++ );
            }
        }


        /* First column initialization */
        memset( H, 0, cn*sizeof(H[0]) );
        memset( luc, 0, cn*sizeof(luc[0]) );
        if ( pad_left ) 
        {
            for ( c = 0; c < cn; ++c )
            {
                histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
            }
        }
        for ( j = 0; j < (pad_left ? r : 2*r); ++j )
        {
            for ( c = 0; c < cn; ++c ) 
            {
                histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
            }
        }
        for ( c = 0; c < cn; ++c )
        {
            for ( k = 0; k < 16; ++k )
            {
                histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
            }
        }


        for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j )
        {
            for ( c = 0; c < cn; ++c ) 
            {
                const uint16_t t = 2*r*r + 2*r;
                uint16_t sum = 0, *segment;
                int b;


                histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );


                /* Find median at coarse level */
                for ( k = 0; k < 16 ; ++k ) 
                {
                    sum += H[c].coarse[k];
                    if ( sum > t ) 
                    {
                        sum -= H[c].coarse[k];
                        break;
                    }
                }
                assert( k < 16 );


                /* Update corresponding histogram segment */
                if ( luc[c][k] <= j-r ) 
                {
                    memset( &H[c].fine[k], 0, 16 * sizeof(uint16_t) );
                    for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] )
                    {
                        histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
                    }
                    if ( luc[c][k] < j+r+1 )
                    {
                        histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
                        luc[c][k] = j+r+1;
                    }
                }
                else 
                {
                    for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) 
                    {
                        histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
                        histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
                    }
                }


                histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );


                /* Find median in segment */
                segment = H[c].fine[k];
                for ( b = 0; b < 16 ; ++b )
                {
                    sum += segment[b];
                    if ( sum > t ) 
                    {
                        dst[dst_step*i+cn*j+c] = 16*k + b;
                        break;
                    }
                }
                assert( b < 16 );
            }
        }
    }


#if defined(__SSE2__) || defined(__MMX__)
    _mm_empty();
    _mm_free(h_coarse);
    _mm_free(h_fine);
#else
    free(h_coarse);
    free(h_fine);
#endif
}


/**
 * \brief Constant-time median filtering
 *
 * This function does a median filtering of an 8-bit image. The source image is
 * processed as if it was padded with zeros. The median kernel is square with
 * odd dimensions. Images of arbitrary size may be processed.
 *
 * To process multi-channel images, you must call this function multiple times,
 * changing the source and destination adresses and steps such that each channel
 * is processed as an independent single-channel image.
 *
 * Processing images of arbitrary bit depth is not supported.
 *
 * The computing time is O(1) per pixel, independent of the radius of the
 * filter. The algorithm's initialization is O(r*width), but it is negligible.
 * Memory usage is simple: it will be as big as the cache size, or smaller if
 * the image is small. For efficiency, the histograms' bins are 16-bit wide.
 * This may become too small and lead to overflow as \a r increases.
 *
 * \param src           Source image data.
 * \param dst           Destination image data. Must be preallocated.
 * \param width         Image width, in pixels.
 * \param height        Image height, in pixels.
 * \param src_step      Distance between adjacent pixels on the same column in
 *                      the source image, in bytes.
 * \param dst_step      Distance between adjacent pixels on the same column in
 *                      the destination image, in bytes.
 * \param r             Median filter radius. The kernel will be a 2*r+1 by
 *                      2*r+1 square.
 * \param cn            Number of channels. For example, a grayscale image would
 *                      have cn=1 while an RGB image would have cn=3.
 * \param memsize       Maximum amount of memory to use, in bytes. Set this to
 *                      the size of the L2 cache, then vary it slightly and
 *                      measure the processing time to find the optimal value.
 *                      For example, a 512 kB L2 cache would have
 *                      memsize=512*1024 initially.
 */
void ctmf(unsigned char* src,unsigned char* dst,int width,int height,int src_step,int dst_step,int r,int cn,long unsigned int memsize)
{
    /*
     * Processing the image in vertical stripes is an optimization made
     * necessary by the limited size of the CPU cache. Each histogram is 544
     * bytes big and therefore I can fit a limited number of them in the cache.
     * That number may sometimes be smaller than the image width, which would be
     * the number of histograms I would need without stripes.
     *
     * I need to keep histograms in the cache so that they are available
     * quickly when processing a new row. Each row needs access to the previous
     * row's histograms. If there are too many histograms to fit in the cache,
     * thrashing to RAM happens.
     *
     * To solve this problem, I figure out the maximum number of histograms
     * that can fit in cache. From this is determined the number of stripes in
     * an image. The formulas below make the stripes all the same size and use
     * as few stripes as possible.
     *
     * Note that each stripe causes an overlap on the neighboring stripes, as
     * when mowing the lawn. That overlap is proportional to r. When the overlap
     * is a significant size in comparison with the stripe size, then we are not
     * O(1) anymore, but O(r). In fact, we have been O(r) all along, but the
     * initialization term was neglected, as it has been (and rightly so) in B.
     * Weiss, "Fast Median and Bilateral Filtering", SIGGRAPH, 2006. Processing
     * by stripes only makes that initialization term bigger.
     *
     * Also, note that the leftmost and rightmost stripes don't need overlap.
     * A flag is passed to ctmf_helper() so that it treats these cases as if the
     * image was zero-padded.
     */
    int stripes = (int) ceil( (double) (width - 2*r) / (memsize / sizeof(Histogram) - 2*r) );
    int stripe_size = (int) ceil( (double) ( width + stripes*2*r - 2*r ) / stripes );


    int i;


    for ( i = 0; i < width; i += stripe_size - 2*r ) {
        int stripe = stripe_size;
        /* Make sure that the filter kernel fits into one stripe. */
        if ( i + stripe_size - 2*r >= width || width - (i + stripe_size - 2*r) < 2*r+1 ) {
            stripe = width - i;
        }


        ctmf_helper( src + cn*i, dst + cn*i, stripe, height, src_step, dst_step, r, cn,
                i == 0, stripe == width - i );


        if ( stripe == width - i ) {
            break;
        }
    }
}


void Median3X3Filter(unsigned char *pSrc,unsigned char *pDst,int rect_w,int rect_h,int nWidth)
{
    unsigned char   *lpSrc;                // 指向源图像的指针
    int aValue[200];
    int bValue[200];
    int i,j,k,l,x,y;
    int radius = 5;
    int len = (2*radius+1)*(2*radius+1);
    memcpy(pDst,pSrc,sizeof(unsigned char)*rect_w);
    memcpy(pDst+(rect_h-1)*rect_w,pSrc+(rect_h-1)*nWidth,sizeof(unsigned char)*rect_w);
    for (i = 1; i < rect_h - 1;i++)
    {
        pDst[i*rect_w] = pSrc[i*nWidth];
        pDst[i*rect_w + rect_w - 1] = pSrc[i*nWidth+rect_w-1];
    }




    for(i = radius; i < rect_h - radius; i++)
    {
        // 列(除去边缘几列)
        for(j = radius; j < rect_w - radius; j++)
        {
            lpSrc = pSrc + i*nWidth + j;
            k = 0;
            for (y = -radius; y <= radius; y++)
            {
                for (x = -radius; x <= radius; x++)
                {
                    aValue[k++] = lpSrc[y*nWidth+x];
                }
            }
            
            // 获取中值
            //BubbleSort(aValue, len);
            QuickSortND(aValue,len);


            pDst[i*rect_w+j] = aValue[len/2];
        }
    }


}


void BubbleSort(int * bArray, int iFilterLen)
{
    int i,j; // 循环变量
    unsigned char bTemp;
    
    // 用冒泡法对数组进行排序
    for (j = 0; j < iFilterLen - 1; j ++)
    {
        for (i = 0; i < iFilterLen - j - 1; i ++)
        {
            if (bArray[i] > bArray[i + 1])
            {
                // 互换
                bTemp = bArray[i];
                bArray[i] = bArray[i + 1];
                bArray[i + 1] = bTemp;
            }
        }
    }
}








void exchData(int *x0,int *x1)
{
    int tmp = *x0;
    *x0 = *x1;
    *x1 = tmp;
}
//划分子函数
int partition(int SortData[],int low,int high)
{
    int len=high-low+1;
    if(len<2) return low ;
    int i=low;               //前段搜寻标
    int j=high+1; //号,注意这里i=
    int key=SortData[low];
    for(;;)
    {
        do 
        {
            i++;
        }while(i<=high&&SortData[i]<key);
        do 
        {
            j--;
        }while(SortData[j]>key);
        if(i>=j) 
            break;


        exchData(SortData+i,SortData+j);
    }
    exchData(SortData+low,SortData+j);
    return j;
}


void QuickSortND(int SortData[],int len)
{


    int stk[64] = {0};
    int p=0;
    int l,h,m;
    if(len<2) 
        return;
    stk[p++]=0;
    stk[p++]=len-1;
    while(p!=0)
    {
        h=stk[--p];
        l=stk[--p];
        if (l<h)
        {
            m=partition(SortData,l,h);
            if (m-1>l)
            {
                stk[p++]=l;
                stk[p++]=m-1;
            }
            if (m+1<h)
            {
                stk[p++]=m+1;
                stk[p++]=h;
            }
        }
    }

}



// median3X3.cpp : 定义控制台应用程序的入口点。
//


#include "stdafx.h"


#define WW  1920
#define HH  1080


static unsigned char MeM0[WW*HH];


static unsigned char pDst0[1000*1000]; 
static unsigned char pDst1[1000*1000]; 


int _tmain(int argc, _TCHAR* argv[])
{
    
    srand((unsigned)time(NULL));


    int y,x;


    for (y = 0; y < HH; y++)
    {
        for (x = 0; x < WW; x++)
        {
            MeM0[y*WW+x] = rand()%256;
        }
    }


    int rect_w = 1000;
    int rect_h = 1000;
    /*
    void ctmf(
        const unsigned char* const src, unsigned char* const dst,
        const int width, const int height,
        const int src_step, const int dst_step,
        const int r, const int cn, const long unsigned int memsize
        );
*/


    for (int i = 0; i < 1; i++)
    {
        int t0 = clock();
        Median3X3Filter(MeM0,pDst0,rect_w,rect_h,WW);
        t0 = clock()-t0;
        printf("t0 = %d\n",t0);


        int t1 = clock();
        ctmf(MeM0,pDst1,rect_w,rect_h,WW,rect_w,5,1,160*1024);
        t1 = clock()-t1;
        printf("t1 = %d\n",t1);




    }






#if 0
    for (y = 0; y < rect_h; y++)
    {
        for (x = 0; x < rect_w; x++)
        {
            printf("%4d,",MeM0[y*WW+x]);
        }
        printf("\n");
    }
    printf("\n");printf("\n");printf("\n");printf("\n");


    for (y = 0; y < rect_h; y++)
    {
        for (x = 0; x < rect_w; x++)
        {
            printf("%4d,",pDst0[y*rect_w+x]);
        }
        printf("\n");
    }
    printf("\n");printf("\n");printf("\n");printf("\n");




    for (y = 0; y < rect_h; y++)
    {
        for (x = 0; x < rect_w; x++)
        {
            printf("%4d,",pDst1[y*rect_w+x]);
        }
        printf("\n");
    }
    printf("\n");printf("\n");printf("\n");printf("\n");




#endif


    for (y = 5; y < rect_h-5; y++)
    {
        for (x = 5; x < rect_w-5; x++)
        {
            if (pDst1[y*rect_w+x] != pDst0[y*rect_w+x])
            {
                printf("error\n");
            }
        }
    }
    printf("\n");printf("\n");printf("\n");printf("\n");






return 0;
}


相关文章推荐

图像中值滤波FPGA和MATLAB的实现

  • 2015年12月15日 10:05
  • 26.42MB
  • 下载

中值滤波matlab代码

  • 2015年05月12日 20:29
  • 2KB
  • 下载

自适应中值滤波及MATLAB实现

自适应中值滤波器是以m*n的矩形窗口Sxy定义的滤波器区域内图像的统计特性为基础的,可以处理具有更大概率的脉冲噪声如椒盐噪声,在平滑非脉冲噪声时能保留细节。 其matlab实现如下,并与中值滤波作比较...

中值滤波matlab

  • 2015年12月21日 19:32
  • 1KB
  • 下载

中值滤波代码

  • 2014年12月16日 08:46
  • 9KB
  • 下载

【opencv学习笔记1】5种图像滤波辨析:方框、均值、高斯、中值、双边

图像滤波 什么是图像滤波 图像滤波,即在尽量保留图像细节特征的条件下对目标图像的噪声进行抑制,是图像预处理中不可缺少的操作,其处理效果的好坏将直接影响到后续图像处理和分析的有效性和可靠性。...

中值滤波程序

  • 2014年05月20日 17:03
  • 558B
  • 下载

通过中值滤波,归一化对比LAMOST与SDSS的光谱

% Author:shizhixin % Email:szhixin@gmail.com % Blog:http://blog.csdn.net/shizhixin % Date:2012-03-12...

自适应中值滤波算法

  • 2014年08月30日 10:41
  • 544KB
  • 下载
内容举报
返回顶部
收藏助手
不良信息举报
您举报文章:中值滤波--sj
举报原因:
原因补充:

(最多只允许输入30个字)