1.图像快速遍历
遍历方法:直接获取cv::Mat对象的像素块数据指针
void fasterTraversePixel(cv::Mat &src)
{
int w = src.cols;
int h = src.rows;
for (int row = 0; row < h; row++)
{
uchar* uc_pixel = src.data + row*src.step;
for (int col = 0; col < w; col++)
{
uc_pixel[0] = 255 - uc_pixel[0];
uc_pixel[1] = 255 - uc_pixel[1];
uc_pixel[2] = 255 - uc_pixel[2];
uc_pixel += 3;
}
}
}
测试图像信息如下,遍历这幅图像的时间为12ms左右。
2. 快速中值滤波
方法: 对于半径为1的中值滤波,涉及3*3像素,只需进行固定次数的像素比较,并采用SSE优化
void fasterMedianBlur3X3_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride)
{
int Channel = Stride / Width;
int BlockSize = 16, Block = ((Width - 2)* Channel) / BlockSize;
for (int Y = 1; Y < Height - 1; Y++)
{
unsigned char *LineP0 = Src + (Y - 1) * Stride + Channel;
unsigned char *LineP1 = LineP0 + Stride;
unsigned char *LineP2 = LineP1 + Stride;
unsigned char *LinePD = Dest + Y * Stride + Channel;
for (int X = 0; X < Block * BlockSize; X += BlockSize, LineP0 += BlockSize, LineP1 += BlockSize, LineP2 += BlockSize, LinePD += BlockSize)
{
__m128i P0 = _mm_loadu_si128((__m128i *)(LineP0 - Channel));
__m128i P1 = _mm_loadu_si128((__m128i *)(LineP0 - 0));
__m128i P2 = _mm_loadu_si128((__m128i *)(LineP0 + Channel));
__m128i P3 = _mm_loadu_si128((__m128i *)(LineP1 - Channel));
__m128i P4 = _mm_loadu_si128((__m128i *)(LineP1 - 0));
__m128i P5 = _mm_loadu_si128((__m128i *)(LineP1 + Channel));
__m128i P6 = _mm_loadu_si128((__m128i *)(LineP2 - Channel));
__m128i P7 = _mm_loadu_si128((__m128i *)(LineP2 - 0));
__m128i P8 = _mm_loadu_si128((__m128i *)(LineP2 + Channel));
_mm_sort_ab(P1, P2); _mm_sort_ab(P4, P5); _mm_sort_ab(P7, P8);
_mm_sort_ab(P0, P1); _mm_sort_ab(P3, P4); _mm_sort_ab(P6, P7);
_mm_sort_ab(P1, P2); _mm_sort_ab(P4, P5); _mm_sort_ab(P7, P8);
_mm_sort_ab(P0, P3); _mm_sort_ab(P5, P8); _mm_sort_ab(P4, P7);
_mm_sort_ab(P3, P6); _mm_sort_ab(P1, P4); _mm_sort_ab(P2, P5);
_mm_sort_ab(P4, P7); _mm_sort_ab(P4, P2); _mm_sort_ab(P6, P4);
_mm_sort_ab(P4, P2);
_mm_storeu_si128((__m128i *)LinePD, P4);
}
for (int X = Block * BlockSize; X < (Width - 2) * Channel; X++, LinePD++)
{
// DO Something
}
}
}
测试图像信息如下,中值滤波时间为0.14ms左右。
带椒盐噪声的图像
中值滤波效果
3. 完整测试代码
//图像快速遍历与快速中值滤波测试代码
//by andyoyo@swust
//2019/7/3
#include <opencv2/opencv.hpp>
#include <iostream>
//cv::Mat 图像快速遍历
void fasterTraversePixel(cv::Mat &src)
{
int w = src.cols;
int h = src.rows;
for (int row = 0; row < h; row++)
{
uchar* uc_pixel = src.data + row*src.step;
for (int col = 0; col < w; col++)
{
uc_pixel[0] = 255 - uc_pixel[0];
uc_pixel[1] = 255 - uc_pixel[1];
uc_pixel[2] = 255 - uc_pixel[2];
uc_pixel += 3;
}
}
}
inline void _mm_sort_ab(__m128i &a, __m128i &b)
{
const __m128i min = _mm_min_epu8(a, b);
const __m128i max = _mm_max_epu8(a, b);
a = min; b = max;
}
//3*3快速中值滤波SSE优化
void fasterMedianBlur3X3_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride)
{
int Channel = Stride / Width;
int BlockSize = 16, Block = ((Width - 2)* Channel) / BlockSize;
for (int Y = 1; Y < Height - 1; Y++)
{
unsigned char *LineP0 = Src + (Y - 1) * Stride + Channel;
unsigned char *LineP1 = LineP0 + Stride;
unsigned char *LineP2 = LineP1 + Stride;
unsigned char *LinePD = Dest + Y * Stride + Channel;
for (int X = 0; X < Block * BlockSize; X += BlockSize, LineP0 += BlockSize, LineP1 += BlockSize, LineP2 += BlockSize, LinePD += BlockSize)
{
__m128i P0 = _mm_loadu_si128((__m128i *)(LineP0 - Channel));
__m128i P1 = _mm_loadu_si128((__m128i *)(LineP0 - 0));
__m128i P2 = _mm_loadu_si128((__m128i *)(LineP0 + Channel));
__m128i P3 = _mm_loadu_si128((__m128i *)(LineP1 - Channel));
__m128i P4 = _mm_loadu_si128((__m128i *)(LineP1 - 0));
__m128i P5 = _mm_loadu_si128((__m128i *)(LineP1 + Channel));
__m128i P6 = _mm_loadu_si128((__m128i *)(LineP2 - Channel));
__m128i P7 = _mm_loadu_si128((__m128i *)(LineP2 - 0));
__m128i P8 = _mm_loadu_si128((__m128i *)(LineP2 + Channel));
_mm_sort_ab(P1, P2); _mm_sort_ab(P4, P5); _mm_sort_ab(P7, P8);
_mm_sort_ab(P0, P1); _mm_sort_ab(P3, P4); _mm_sort_ab(P6, P7);
_mm_sort_ab(P1, P2); _mm_sort_ab(P4, P5); _mm_sort_ab(P7, P8);
_mm_sort_ab(P0, P3); _mm_sort_ab(P5, P8); _mm_sort_ab(P4, P7);
_mm_sort_ab(P3, P6); _mm_sort_ab(P1, P4); _mm_sort_ab(P2, P5);
_mm_sort_ab(P4, P7); _mm_sort_ab(P4, P2); _mm_sort_ab(P6, P4);
_mm_sort_ab(P4, P2);
_mm_storeu_si128((__m128i *)LinePD, P4);
}
for (int X = Block * BlockSize; X < (Width - 2) * Channel; X++, LinePD++)
{
// DO Something
}
}
}
int main()
{
cv::Mat image = cv::imread("test.png",-1);
if (image.empty())
{
std::cout << "image is empty" << std::endl;
system("pause");
return 0;
}
cv::imshow("src", image);
cv::waitKey(30);
unsigned char *Src = image.data;
cv::Mat dst;
image.copyTo(dst);
unsigned char *Dest = dst.data;
int Width, Height, Stride;
Width = image.cols;
Height = image.rows;
Stride = image.step;
int64 t1 = cv::getTickCount();
//fasterTraversePixel(image);//图像快速遍历
fasterMedianBlur3X3_SSE(Src,Dest,Width,Height,Stride);//快速中值滤波
int64 t2 = cv::getTickCount();
double t = ((t2 - t1) / cv::getTickFrequency()) * 1000;
std::cout << "Run time:" << t << "ms" << std::endl;
cv::imshow("dst", dst);
cv::waitKey(0);
//system("pause");
return 0;
}
4. 参考资料
[2] https://www.cnblogs.com/Imageshop/p/11087804.html