一个超级快速的开平方根C函数
#include <stdio.h>
#include <time.h>
//#define TEST_Q
//#define TEST_MATH 1
#define TEST_SSE 1
#ifdef TEST_Q
float new_rsqrt( float number ) //这是卡马克大神级别的优化
{
long i;
float x2, y;
const float threehalfs = 1.5F;
x2 = number * 0.5F;
y = number;
i = * ( long * ) &y; // evil floating point bit level hacking
i = 0x5f3759df - ( i >> 1 ); // what the fuck?
y = * ( float * ) &i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
return y;
}
#endif
#ifdef TEST_MATH
#include <math.h>
float new_rsqrt(float f){ //这是调用标准的math.h函数库
return 1/sqrtf(f);
}
#endif
#ifdef TEST_SSE
#include <xmmintrin.h>
float new_rsqrt(float f){ //这是调用用CPU SSE指令集中rsqrt函数直接得出结果
__m128 m_a = _mm_set_ps1(f);
__m128 m_b = _mm_rsqrt_ps(m_a);
return m_b[0];
}
#endif
int main(int argc, const char * argv[]) {
int i ;
float r;
printf("%f\n", new_rsqrt(9)); //因为结果应该是1/3,无限循环,用以对比精度
clock_t start_time = clock();
for (i=0;i< 0xfffffff; i++){
r = new_rsqrt(i);
}
printf("Time cost: %ld ns\n", clock() - start_time);
}
LitrindeMacBook-Pro:~ litrin$ time ./test_q
0.332953
Time cost: 7746636 ns
real 0m7.773s
user 0m7.733s
sys 0m0.019s
LitrindeMacBook-Pro:~ litrin$ time ./test_sse
0.333252
Time cost: 1391658 ns
real 0m1.399s
user 0m1.391s
sys 0m0.005s
LitrindeMacBook-Pro:~ litrin$ time ./test_normal
0.333333
Time cost: 3882738 ns
real 0m3.892s
user 0m3.879s
sys 0m0.009s