## 西风狂诗曲

Get busy living, or get busy dying. --Andy 《The Shawshank Redemption》

# 使用SSE指令优化的数学函数(整理)

float _SSE_cos( float x)
{
float temp;
__asm
{
movss xmm0, x
andps xmm0, xmm1
mulss xmm0, _ps_am_2_o_pi

cvttss2si ecx, xmm0
movss xmm5, _ps_am_1
mov  edx, ecx
shl  edx, (31 - 1)
cvtsi2ss xmm1, ecx
and  edx, 0x80000000
and  ecx, 0x1

subss xmm0, xmm1
minss xmm0, xmm5

movss xmm1, _ps_sincos_p3
subss xmm5, xmm0

andps xmm5, xmm6
movss xmm7, _ps_sincos_p2
andnps xmm6, xmm0
mov  temp, edx
orps xmm5, xmm6
movss xmm0, xmm5

mulss xmm5, xmm5
movss xmm4, _ps_sincos_p1
movss xmm2, xmm5
mulss xmm5, xmm1
movss xmm1, _ps_sincos_p0
mulss xmm5, xmm2
movss xmm3, temp
mulss xmm5, xmm2
orps xmm0, xmm3
mulss xmm0, xmm5

movss   x,    xmm0

}

return x;
}

float _SSE2_cos(float x)
{
__asm
{
movss xmm0, x
movss xmm2, _ps_am_pi_o_2
movss xmm3, _ps_am_2_o_pi
andps xmm0, xmm1
mulss xmm0, xmm3

pxor xmm3, xmm3
movd xmm5, _epi32_1
movss xmm4, _ps_am_1
cvttps2dq xmm2, xmm0
pand xmm5, xmm2
movd xmm1, _epi32_2
pcmpeqd xmm5, xmm3
cvtdq2ps xmm6, xmm2
pand xmm2, xmm1
pslld xmm2, (31 - 1)

subss xmm0, xmm6
movss xmm3, _ps_sincos_p3
minss xmm0, xmm4
subss xmm4, xmm0
andps xmm0, xmm5
andnps xmm5, xmm4
orps xmm0, xmm5

movaps xmm1, xmm0
movss xmm4, _ps_sincos_p2
mulss xmm0, xmm0
movss xmm5, _ps_sincos_p1
orps xmm1, xmm2
movaps xmm7, xmm0
mulss xmm0, xmm3
movss xmm6, _ps_sincos_p0
mulss xmm0, xmm7
mulss xmm0, xmm7
mulss xmm0, xmm1
movss   x,    xmm0
}

return x;
}

float _SSE_Sqrt(float x)
{

float root = 0.f;
_asm
{
sqrtss  xmm0, x
movss  root, xmm0
}

return root;
}

#### 使用SSE指令优化的数学函数(整理)

2004-08-15 15:56:00

#### SSE指令优化一例

2016-03-11 15:46:32

#### vc2010 sse指令优化效果明显

2015-10-16 17:07:36

#### sse 指令优化叉乘

2010-01-04 11:13:00

#### 使用SSE指令集优化memcpy

2016-03-11 15:49:13

#### SSE指令的使用学习

2015-09-24 13:39:53

#### 使用MMX/SSE汇编指令集优化视频开发

2015-05-26 17:16:52

#### SSE指令集 c,c++程序代码优化

2017-10-20 11:37:21

#### 矩阵转置的SSE汇编优化艺术以及ARM cortext 汇编优化 .

2013-01-03 14:05:43

#### SSE入门

2014-12-31 23:43:12