wiki链接:http://en.wikipedia.org/wiki/Haar_wavelet
可用SSE2实现HAAR小波变换,达到实时,关于HAAR小波的介绍可参考以上维基链接
参考MATLAB中dwt2与idwt2的函数原型,基于OpenCV的框架进行了汇编优化实现
HAAR小波也可用于图像的压缩,将CH,CV,CD中的分量值小于某一阈值则归为0,从而这三个矩阵将成为稀疏矩阵(Sparse Matrix),反变换后的图像的质量将取决于选择阈值的大小。
实际实现时,可选择使用浮点数(单精度或双精度)进行矩阵计算,使用整数计算能得到更快的速度但不能进行完整的压缩与解压缩。
附代码:
- inline void dwt2_row(__out double* ca0,
- __out double* ch0,
- __out double* cv0,
- __out double* cd0,
- __in unsigned char* row0,
- __in unsigned char* row1,
- __in int col)
- {
- __asm
- {
- mov eax_ptr, ca0;
- mov ebx_ptr, ch0;
- mov ecx_ptr, cv0;
- mov edx_ptr, cd0;
- mov esi_ptr, row0;
- mov edi_ptr, row1;
- pxor xmm3, xmm3;
- movapd xmm7, g_halfd;
- sub col, 4;
- jl loop_2;
- loop_4:
- movd xmm1, [esi_ptr];
- movd xmm5, [edi_ptr];
- punpcklbw xmm1, xmm3;
- punpcklbw xmm5, xmm3;
- punpcklwd xmm1, xmm3;
- punpcklwd xmm5, xmm3;
- cvtdq2pd xmm0, xmm1;
- cvtdq2pd xmm4, xmm5;
- shufpd xmm1, xmm1, 1;
- shufpd xmm5, xmm5, 1;
- cvtdq2pd xmm1, xmm1;
- cvtdq2pd xmm5, xmm5;
- addpd xmm4, xmm0;
- addpd xmm5, xmm1;
- mulpd xmm4, xmm7;
- mulpd xmm5, xmm7;
- subpd xmm0, xmm4;
- subpd xmm1, xmm5;
- movapd xmm6, xmm4;
- movapd xmm2, xmm0;
- shufpd xmm4, xmm5, 0;
- shufpd xmm6, xmm5, 3;
- shufpd xmm0, xmm1, 0;
- shufpd xmm2, xmm1, 3;
- addpd xmm6, xmm4;
- addpd xmm2, xmm0;
- mulpd xmm6, xmm7;
- mulpd xmm2, xmm7;
- subpd xmm4, xmm6;
- subpd xmm0, xmm2;
- movupd [eax_ptr], xmm6;
- movupd [ebx_ptr], xmm4;
- movupd [ecx_ptr], xmm2;
- movupd [edx_ptr], xmm0;
- add esi_ptr, 4;
- add edi_ptr, 4;
- add eax_ptr, 0x10;
- add ebx_ptr, 0x10;
- add ecx_ptr, 0x10;
- add edx_ptr, 0x10;
- sub col, 4;
- jge loop_4;
- loop_2:
- cmp col, -2;
- jl loop_end;
- pinsrw xmm0, [esi_ptr], 0;
- pinsrw xmm4, [edi_ptr], 0;
- punpcklbw xmm0, xmm3;
- punpcklbw xmm4, xmm3;
- punpcklwd xmm0, xmm3;
- punpcklwd xmm4, xmm3;
- cvtdq2pd xmm0, xmm0;
- cvtdq2pd xmm4, xmm4;
- addpd xmm4, xmm0;
- mulpd xmm4, xmm7;
- subpd xmm0, xmm4;
- movapd xmm5, xmm4;
- shufpd xmm4, xmm0, 0;
- shufpd xmm5, xmm0, 3;
- addpd xmm5, xmm4;
- mulpd xmm5, xmm7;
- subpd xmm4, xmm5;
- movsd [eax_ptr], xmm5;
- shufpd xmm5, xmm5, 1;
- movsd [ebx_ptr], xmm4;
- shufpd xmm4, xmm4, 1;
- movsd [ecx_ptr], xmm5;
- movsd [edx_ptr], xmm4;
- loop_end:
- }
- }
- inline void idwt2_row(__out unsigned char* row0,
- __out unsigned char* row1,
- __in double* ca0,
- __in double* ch0,
- __in double* cv0,
- __in double* cd0,
- __in int col)
- {
- __asm
- {
- mov eax_ptr, ca0;
- mov ebx_ptr, ch0;
- mov ecx_ptr, cv0;
- mov edx_ptr, cd0;
- mov esi_ptr, row0;
- mov edi_ptr, row1;
- sub col, 4;
- jl loop_2;
- loop_4:
- movupd xmm0, [eax_ptr];
- movupd xmm1, [ebx_ptr];
- movupd xmm4, [ecx_ptr];
- movupd xmm5, [edx_ptr];
- addpd xmm1, xmm0;
- addpd xmm5, xmm4;
- addpd xmm0, xmm0;
- addpd xmm4, xmm4;
- subpd xmm0, xmm1;
- subpd xmm4, xmm5;
- movapd xmm2, xmm1;
- movapd xmm6, xmm5;
- shufpd xmm1, xmm0, 0;
- shufpd xmm2, xmm0, 3;
- shufpd xmm5, xmm4, 0;
- shufpd xmm6, xmm4, 3;
- addpd xmm5, xmm1;
- addpd xmm6, xmm2;
- addpd xmm1, xmm1;
- addpd xmm2, xmm2;
- subpd xmm1, xmm5;
- subpd xmm2, xmm6;
- cvttpd2dq xmm5, xmm5;
- cvttpd2dq xmm6, xmm6;
- cvttpd2dq xmm1, xmm1;
- cvttpd2dq xmm2, xmm2;
- shufpd xmm5, xmm6, 0;
- shufpd xmm1, xmm2, 0;
- packssdw xmm5, xmm1;
- packuswb xmm5, xmm5;
- pshufd xmm1, xmm5, 1;
- movd [esi_ptr], xmm5;
- movd [edi_ptr], xmm1;
- add esi_ptr, 4;
- add edi_ptr, 4;
- add eax_ptr, 0x10;
- add ebx_ptr, 0x10;
- add ecx_ptr, 0x10;
- add edx_ptr, 0x10;
- sub col, 4;
- jge loop_4;
- loop_2:
- cmp col, -2;
- jl loop_end;
- movsd xmm0, [eax_ptr];
- movsd xmm1, [ebx_ptr];
- movsd xmm4, [ecx_ptr];
- movsd xmm5, [edx_ptr];
- addpd xmm1, xmm0;
- addpd xmm5, xmm4;
- addpd xmm0, xmm0;
- addpd xmm4, xmm4;
- subpd xmm0, xmm1;
- subpd xmm4, xmm5;
- shufpd xmm1, xmm0, 0;
- shufpd xmm5, xmm4, 0;
- addpd xmm5, xmm1;
- addpd xmm1, xmm1;
- subpd xmm1, xmm5;
- cvttpd2dq xmm5, xmm5;
- cvttpd2dq xmm1, xmm1;
- packssdw xmm5, xmm1;
- packuswb xmm5, xmm5;
- movd eax_ptr, xmm5;
- mov [esi_ptr], ax;
- shr eax_ptr, 16;
- stosw;
- loop_end:
- }
- }
- inline void dwt2(__out cv::Mat& CA,
- __out cv::Mat& CH,
- __out cv::Mat& CV,
- __out cv::Mat& CD,
- __in cv::Mat const& I)
- {
- if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1)
- return;
- double* ca = reinterpret_cast<double*>(CA.data);
- double* ch = reinterpret_cast<double*>(CH.data);
- double* cv = reinterpret_cast<double*>(CV.data);
- double* cd = reinterpret_cast<double*>(CD.data);
- unsigned char* row = reinterpret_cast<unsigned char*>(I.data);
- for(int i=0; i<I.rows; i+=2)
- {
- dwt2_row(ca, ch, cv, cd, row, row+I.cols, I.cols);
- ca += CA.cols;
- ch += CH.cols;
- cv += CV.cols;
- cd += CD.cols;
- row += I.cols*2;
- }
- }
- inline void idwt2(__out cv::Mat& I,
- __in cv::Mat const& CA,
- __in cv::Mat const& CH,
- __in cv::Mat const& CV,
- __in cv::Mat const& CD)
- {
- if(CA.type() != CV_64FC1 || CH.type() != CV_64FC1 || CV.type() != CV_64FC1 || CD.type() != CV_64FC1 || I.channels() != 1)
- return;
- double* ca = reinterpret_cast<double*>(CA.data);
- double* ch = reinterpret_cast<double*>(CH.data);
- double* cv = reinterpret_cast<double*>(CV.data);
- double* cd = reinterpret_cast<double*>(CD.data);
- unsigned char* row = reinterpret_cast<unsigned char*>(I.data);
- for(int i=0; i<I.rows; i+=2)
- {
- idwt2_row(row, row+I.cols, ca, ch, cv, cd, I.cols);
- ca += CA.cols;
- ch += CH.cols;
- cv += CV.cols;
- cd += CD.cols;
- row += I.cols*2;
- }
- }
版权归作者所有,转载请注明出处!