看了下liangbch 兄打包的Uint32sqrt测试程序,发现进一步改进FPU式的很难,不过我还是勉强做了些细微的改动,给出四个版本加上iSqrt_FPU2_yaos,iSqrt_FPU1_lbc。 代码: #include <stdio.h> #include <time.h> typedef unsigned int DWORD; double b32[] = {0.0,4294967296.0}; __declspec(naked) DWORD __fastcall iSqrt_FPU2_yaos(DWORD n) { __asm { push ecx mov eax, ecx and eax, 0x80000000 shr eax, 31 fld qword ptr [b32 + eax * 8] fild dword ptr [esp] faddp st(1), st fsqrt sub esp, 8 fstp qword ptr [esp] mov edx, dword ptr [esp + 4] mov eax, edx and edx,0x7ff00000 and eax,0xfffff shr edx, 20 or eax, 0x100000 xchg ecx, edx sub ecx, 1043 neg ecx shr eax, cl xchg edx, ecx add esp, 12 or ecx, ecx cmove eax, ecx ret } } _declspec(naked) DWORD fast_sqrt1(DWORD x) { _asm { sub esp,4 mov dword ptr [esp+1