long BilinearInterpolateD( const SwImage & pic,double fx,double fy )
{
#define BILINEAR4_UNITRPOC(a,b,c,d) ((a<<22) +(b-a)*rx2048 + (c-a)*ry2048 + (a-b-c+d)*rxry )
long x = (long)fx;
long y = (long)fy;
unsigned char * c0 = ( unsigned char* )(pic.buffer) + ((pic.width * y + x)<<2);
unsigned char * c2 = c0 + ( pic.width <<2 );
long rx = (long)(( fx - x ) * 2048);
long ry = (long)(( fy - y ) * 2048);
long rx2048 = rx << 11;
long ry2048 = ry << 11;
long rxry = rx * ry;
long rr = BILINEAR4_UNITRPOC( *(c0+2), *(c0+6), *(c2+2), *(c2+6) );
long gg = BILINEAR4_UNITRPOC( *(c0+1), *(c0+5), *(c2+1), *(c2+5) );
long bb = BILINEAR4_UNITRPOC( *(c0), *(c0+4), *(c2), *(c2+4) );
return ((rr>>22<<16) | (gg>>22<<8) | bb>>22);
#undef BILINEAR4_UNITRPOC
}
//使用SSE汇编指令集
BOOL SwImgRotateBilinearSSE( SwImage * pPicDest, const SwImage & picSrc, float angle )
{
long srcw = picSrc.width;
long srch = picSrc.height;
long halfsrcw = srcw >> 1;
long halfsrch = srch >> 1;
long neg_halfsrcw = -halfsrcw;
long neg_halfsrch = -halfsrch;
long destw = 0;
long desth = 0;
SwColor * pDest = NULL;
long isx = 0;
long isy = 0;
__declspec(align(16)) float arc = angle * _PI_DIV_180_FS;
__declspec(align(16)) float sina = sin( arc );
__declspec(align(16)) float cosa = cos( arc );
__declspec(align(16)) float fhalfsrcw = (float)srcw * 0.5f;
__declspec(align(16)) float fhalfsrch = (float)srch * 0.5f;
SwImgCalcRotExtent( srcw, srch, sina, cosa, &destw, &desth );
if( ( destw <= 0 ) || ( desth <= 0 ) )
return FALSE;
SwImgAlloc( pPicDest, destw, desth );
pDest = pPicDest->buffer;
__declspec(align(16)) float srcx0 = -(destw * 0.5f)*cosa + (-(desth * 0.5f)*sina);
__declspec(align(16)) float srcy0 = -(destw * 0.5f)*sina - (-(desth * 0.5f)*cosa);
__declspec(align(16)) float fsx = 0;
__declspec(align(16)) float fsy = 0;
_asm movaps xmm0,srcx0
_asm movaps xmm1,srcy0
for (int y = 0; y < desth; y++)
{
_asm movaps xmm2,xmm0
_asm movaps xmm3,xmm1
for ( int x = 0; x < destw; x++)
{
__asm
{
cvtss2si eax,xmm2
// if( isx > halfsrcw || isx < neg_halfsrcw ) goto over;
cmp eax,halfsrcw
jg LOOP2_OVER
cmp eax,neg_halfsrcw
jl LOOP2_OVER
cvtss2si eax,xmm3
// if( isy > halfsrch || isy < neg_halfsrch ) goto over;
cmp eax,halfsrch
jg LOOP2_OVER
cmp eax,neg_halfsrch
jl LOOP2_OVER
movss xmm4,xmm2
movss xmm5,xmm3
addss xmm4,fhalfsrcw
subss xmm5,fhalfsrch
movss fsx,xmm4
movss fsy,xmm5
// 将 fsy 绝对值,并截取整数部分到 isy
fld dword ptr[fsy]
fabs
fst dword ptr[fsy]
fisttp dword ptr[isy]
// 因为 BinlinearInterpolate4 并不进行任何的参数检查
// 所以这里进行预先判断
cvtss2si eax,xmm4 // eax = (int)fsx
add eax,2
cmp eax,srcw
jnb LOOP2_OVER
mov eax,isy
add eax,2
cmp eax,srch
jnb LOOP2_OVER
// 将参数压放栈中,调用函数 BinlinearInterpolate4
fld dword ptr[fsy]
sub esp,8
fstp qword ptr[esp]
fld dword ptr[fsx]
sub esp,8
fstp qword ptr[esp]
mov eax,dword ptr[picSrc]
push eax
call BilinearInterpolateD
add esp,14h
// 现在将函数的返回值赋给 p[x]
mov ecx,dword ptr[x]
mov edx,dword ptr[pDest]
mov dword ptr[edx+ecx*4],eax
LOOP2_OVER:
addss xmm2,cosa
addss xmm3,sina
}
}
pDest += destw;
_asm addss xmm0,sina
_asm subss xmm1,cosa
}
return TRUE;
}