# 使用SSE指令优化的数学函数(整理)

810人阅读 评论(0)

float _SSE_cos( float x)
{
float temp;
__asm
{
movss xmm0, x
andps xmm0, xmm1
mulss xmm0, _ps_am_2_o_pi

cvttss2si ecx, xmm0
movss xmm5, _ps_am_1
mov  edx, ecx
shl  edx, (31 - 1)
cvtsi2ss xmm1, ecx
and  edx, 0x80000000
and  ecx, 0x1

subss xmm0, xmm1
minss xmm0, xmm5

movss xmm1, _ps_sincos_p3
subss xmm5, xmm0

andps xmm5, xmm6
movss xmm7, _ps_sincos_p2
andnps xmm6, xmm0
mov  temp, edx
orps xmm5, xmm6
movss xmm0, xmm5

mulss xmm5, xmm5
movss xmm4, _ps_sincos_p1
movss xmm2, xmm5
mulss xmm5, xmm1
movss xmm1, _ps_sincos_p0
mulss xmm5, xmm2
movss xmm3, temp
mulss xmm5, xmm2
orps xmm0, xmm3
mulss xmm0, xmm5

movss   x,    xmm0

}

return x;
}

float _SSE2_cos(float x)
{
__asm
{
movss xmm0, x
movss xmm2, _ps_am_pi_o_2
movss xmm3, _ps_am_2_o_pi
andps xmm0, xmm1
mulss xmm0, xmm3

pxor xmm3, xmm3
movd xmm5, _epi32_1
movss xmm4, _ps_am_1
cvttps2dq xmm2, xmm0
pand xmm5, xmm2
movd xmm1, _epi32_2
pcmpeqd xmm5, xmm3
cvtdq2ps xmm6, xmm2
pand xmm2, xmm1
pslld xmm2, (31 - 1)

subss xmm0, xmm6
movss xmm3, _ps_sincos_p3
minss xmm0, xmm4
subss xmm4, xmm0
andps xmm0, xmm5
andnps xmm5, xmm4
orps xmm0, xmm5

movaps xmm1, xmm0
movss xmm4, _ps_sincos_p2
mulss xmm0, xmm0
movss xmm5, _ps_sincos_p1
orps xmm1, xmm2
movaps xmm7, xmm0
mulss xmm0, xmm3
movss xmm6, _ps_sincos_p0
mulss xmm0, xmm7
mulss xmm0, xmm7
mulss xmm0, xmm1
movss   x,    xmm0
}

return x;
}

float _SSE_Sqrt(float x)
{

float root = 0.f;
_asm
{
sqrtss  xmm0, x
movss  root, xmm0
}

return root;
}

0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：89061次
• 积分：1194
• 等级：
• 排名：千里之外
• 原创：17篇
• 转载：56篇
• 译文：1篇
• 评论：32条
文章分类
评论排行
最新评论