盒子滤波/均值滤波NEON优化

 github链接:https://github.com/XhtZz/boxfilter

//第一版 暴力版本(复杂度:width*height*(2*radius+1)*(2*radius+1))
void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){
    for(int h=0;h<height;++h){
        int h_shift = h*weight;
        int start_h = max(0,height-radius);
        int end_h = min(h+radius,height-1);
        for(int w=0;w<width;++w){
            int start_w = max(0,w-radius);
            int end_w = max(w+radius,weight-1);

            int sum = 0;
            for(int i=start_h;i<end_h;++i){
                for(int j=start_w;j<end_w;++j){
                    sum += image[i*weight+j];
                }
            }
            image[h_shift+w] = sum;
        }   
    }
}

//第二版 行列拆分(复杂度:wigth*height*(2*radius+1)*2)
void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){
    int* temp = (int*)malloc(height*width*sizeof(int));
    //计算每一行
    for(int h=0;h<height;++h){
        int h_shift = h*weight;
        for(int w=0;w<weight;++w){
            int start_w = min(0,w-radius);
            int end_w = max(w+radius,weight-1)

            float sum = 0;
            for(int i=start_w;i<end_w;++i){
                sum += image[h_shift+i];
            }
            temp[h_shift+w] = sum;
        }
    }

    //计算每一列
    for(int h=0;h<height;++h){
        int h_shift = h*weight;
        int start_h = min(0,h-radius);
        int end_h = max(h+radius,height-1);
        for(int w=0;w<weight;++w){
            float sum = 0;
            for(int j=start_h;j<end_h;++j){
                sum += temp[j*weight+w];
            }
            dst[h_shift+w] = sum;
        }
    }
}

//第三版 考虑复杂度不受radius大小的方法,很简单,计算下一个目标点,只要加上后一列减去前一列就可以,重复利用了中间数据的和(复杂度:weight*height*(2*2))
void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){
    //水平方向
    int* temp = (int*)malloc(sizeof(int)*width*height);
    for(int h=0;h<height;++h){
        int shift_h = h*weight;
        int sum = 0;
        //head
        for(int i=0;i<radius;++i){
            sum += image[shift_h+i];
        }
        for(int i=0;i<=radius;++i){
            sum += image[shift_h+i+radius];
            temp[shift_h+i] = sum;
        }
        //middle
        for(int i=radius+1;i<width-radius;++i){
            sum += image[shift_h+i+radius];
            sum -= image[shift_h+i-radius-1];
            temp[shift_h+i] = sum;
        }
        //tail
        for(int i=width-radius;i<width;++i){
            sum -= image[shift_h+i-radius-1];
            temp[shift_h+i] = sum;
        }
    }

    //垂直方向
    int* row_temp = (int*)malloc(sizeof(int)*width);
    memset(row_temp,0,sizeof(int)*width);
    //head
    for(int h=0;h<radius;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] += temp[shift_h+w];
        }
    }
    for(int h=0;h<=radius;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] += temp[(h+radius)*width+w]
            dst[shift_h+w] = row_temp[w];
        }
    }
    //middle
    for(int h=radius+1;h<height-radius;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] += temp[(h+radius)*width+w];
            row_temp[w] -= temp[(h-radius-1)*width+w];
            dst[shift_h+w] = row_temp[w];
        }
    }
    //tail
    for(int h=height-radius;h<height;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] -= temp[(h-radius-1)*width+w];
            dst[shift_h+w] = row_temp[w];
        }
    }
    free(row_temp);
    free(temp);
}

// 第四版 垂直方向利用neon intrinsic指令进行加速(只做了head部分,其他的类似)
void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){
    //水平方向
    int* temp = (int*)malloc(sizeof(int)*width*height);
    for(int h=0;h<height;++h){
        int shift_h = h*weight;
        int sum = 0;
        //head
        for(int i=0;i<radius;++i){
            sum += image[shift_h+i];
        }
        for(int i=0;i<=radius;++i){
            sum += image[shift_h+i+radius];
            temp[shift_h+i] = sum;
        }
        //middle
        for(int i=radius+1;i<width-radius;++i){
            sum += image[shift_h+i+radius];
            sum -= image[shift_h+i-radius-1];
            temp[shift_h+i] = sum;
        }
        //tail
        for(int i=width-radius;i<width;++i){
            sum -= image[shift_h+i-radius-1];
            temp[shift_h+i] = sum;
        }
    }

    //垂直方向
    ushort* row_temp = (ushort*)malloc(sizeof(ushort)*width);
    memset(row_temp,0,sizeof(ushort)*width);
    int remain = width%16;
    //head
    for(int h=0;h<radius;++h){
        int shift_h = h*width;
        int w;
        for(w=0;w<width;w+=16){
            ushort* t = row_temp;
            uint8x16 temp_vector = vld1q_u8(temp+shift_h+w);

            uint16x8_t row_temp_vector = vld1q_u16(row_temp);
            row_temp += 8;
            row_temp_vector = vaddw_u8(row_temp_vector,vget_low_u8(temp_vector));
            vst1q_u16(t,row_temp_vector);

            row_temp_vector= vld1q_u16(row_temp);
            row_temp += 8;
            row_temp_vector = vaddw_u8(row_temp_vector,vget_high_u8(temp_vector));
            vst1q_u16(t+8,row_temp_vector);
        }
        for(;w<width;++w){
            row_temp[w] += temp[shift_h+w];
        }
    }
    for(int h=0;h<=radius;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] += temp[(h+radius)*width+w]
            dst[shift_h+w] = row_temp[w];
        }
    }
    //middle
    for(int h=radius+1;h<height-radius;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] += temp[(h+radius)*width+w];
            row_temp[w] -= temp[(h-radius-1)*width+w];
            dst[shift_h+w] = row_temp[w];
        }
    }
    //tail
    for(int h=height-radius;h<height;++h){
        int shift_h = h*width;
        for(int w=0;w<width;++w){
            row_temp[w] -= temp[(h-radius-1)*width+w];
            dst[shift_h+w] = row_temp[w];
        }
    }
    free(row_temp);
    free(temp);
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值