VS2005和VS2010 C++标准库 exp结果不一致

文章讲述了在将代码从旧编译器迁移到新编译器过程中发现的精度问题,怀疑是由于不同版本的指令集影响了05编译器的精度。性能测试显示,新编译器10在相同情况下具有显著更快和更精确的结果。
摘要由CSDN通过智能技术生成

如图,今天在工作的时候,要迁移老代码到新的编译工具上,然后两边跑同样的案例不一致,经排查是在一个只根据参数计算结果的函数里发生了不一致,单独把变量摘出来后,复现和debug的时候一致。怀疑是05的exp标准库为了性能、或者当时指令集有限等原因导致为了性能损失了精度。

 10的汇编代码

exp:
000000005AB2D010  sub         rsp,88h  
000000005AB2D017  movsd       mmword ptr [rsp+30h],xmm0  
000000005AB2D01D  mov         rax,qword ptr [__real_inf (5AB31090h)]  
000000005AB2D024  mov         rdx,qword ptr [rsp+30h]  
000000005AB2D029  and         rax,rdx  
000000005AB2D02C  mov         r9,rdx  
000000005AB2D02F  cmp         rax,qword ptr [__real_inf (5AB31090h)]  
000000005AB2D036  je          __x_is_inf_or_nan (5AB2D260h)  
000000005AB2D03C  and         r9,qword ptr [__exp_mant_mask (5AB31070h)]  
000000005AB2D043  cmp         r9,qword ptr [__real_x_near0_threshold (5AB31080h)]  
000000005AB2D04A  movsd       xmm3,mmword ptr [__real_64_by_log2 (5AB310E0h)]  
000000005AB2D052  jl          __process_result_one (5AB2D180h)  
000000005AB2D058  mulsd       xmm3,xmm0  
000000005AB2D05C  comisd      xmm3,mmword ptr [__real_p65536 (5AB310C0h)]  
000000005AB2D064  ja          __y_is_inf (5AB2D240h)  
000000005AB2D06A  comisd      xmm3,mmword ptr [__real_m68800 (5AB310D0h)]  
000000005AB2D072  jbe         __y_is_zero (5AB2D220h)  
000000005AB2D078  cvtpd2dq    xmm4,xmm3  
000000005AB2D07C  lea         r10,[__two_to_jby64_head_table (5AB31360h)]  
000000005AB2D083  lea         r11,[__two_to_jby64_tail_table (5AB31560h)]  
000000005AB2D08A  cvtdq2pd    xmm1,xmm4  
000000005AB2D08E  movsd       xmm2,mmword ptr [__real_log2_by_64_head (5AB310F0h)]  
000000005AB2D096  mulsd       xmm2,xmm1  
000000005AB2D09A  movd        ecx,xmm4  
000000005AB2D09E  mov         rax,3Fh  
000000005AB2D0A5  and         eax,ecx  
000000005AB2D0A7  subsd       xmm0,xmm2  
000000005AB2D0AB  mulsd       xmm1,mmword ptr [__real_log2_by_64_tail (5AB31100h)]  
000000005AB2D0B3  movsd       xmm2,xmm0  
000000005AB2D0B7  sub         ecx,eax  
000000005AB2D0B9  sar         ecx,6  
000000005AB2D0BC  addsd       xmm2,xmm1  
000000005AB2D0C0  movsd       xmm1,xmm2  
000000005AB2D0C4  movsd       xmm0,mmword ptr [__real_1_by_2 (5AB31150h)]  
000000005AB2D0CC  movsd       xmm3,mmword ptr [__real_1_by_24 (5AB31130h)]  
000000005AB2D0D4  movsd       xmm4,mmword ptr [__real_1_by_720 (5AB31110h)]  
000000005AB2D0DC  mulsd       xmm1,xmm2  
000000005AB2D0E0  mulsd       xmm0,xmm2  
000000005AB2D0E4  mulsd       xmm3,xmm2  
000000005AB2D0E8  mulsd       xmm4,xmm2  
000000005AB2D0EC  movsd       xmm5,xmm1  
000000005AB2D0F0  mulsd       xmm1,xmm2  
000000005AB2D0F4  addsd       xmm0,mmword ptr [__real_one (5AB31040h)]  
000000005AB2D0FC  addsd       xmm3,mmword ptr [__real_1_by_6 (5AB31140h)]  
000000005AB2D104  mulsd       xmm5,xmm1  
000000005AB2D108  addsd       xmm4,mmword ptr [__real_1_by_120 (5AB31120h)]  
000000005AB2D110  mulsd       xmm0,xmm2  
000000005AB2D114  mulsd       xmm3,xmm1  
000000005AB2D118  mulsd       xmm4,xmm5  
000000005AB2D11C  xor         r9d,r9d  
000000005AB2D11F  cmp         ecx,dword ptr [__denormal_threshold (5AB31010h)]  
000000005AB2D125  addsd       xmm3,xmm4  
000000005AB2D129  addsd       xmm0,xmm3  
000000005AB2D12D  cmovle      r9d,ecx  
000000005AB2D131  add         rcx,3FFh  
000000005AB2D138  shl         rcx,34h  
000000005AB2D13C  lea         r8,[__two_to_jby64_table (5AB31160h)]  
000000005AB2D143  mulsd       xmm0,mmword ptr [r8+rax*8]  
000000005AB2D149  cmp         rcx,qword ptr [__real_inf (5AB31090h)]  
000000005AB2D150  addsd       xmm0,mmword ptr [r11+rax*8]  
000000005AB2D156  addsd       xmm0,mmword ptr [r10+rax*8]  
000000005AB2D15C  je          __process_almost_inf (5AB2D190h)  
000000005AB2D15E  test        r9d,r9d  
000000005AB2D161  mov         qword ptr [rsp+30h],rcx  
000000005AB2D166  jne         __process_denormal (5AB2D1A0h)  
000000005AB2D168  mulsd       xmm0,mmword ptr [rsp+30h]  
__final_check:
000000005AB2D16E  add         rsp,88h  
000000005AB2D175  ret  
000000005AB2D176  nop         word ptr [rax+rax]  
__process_result_one:
000000005AB2D180  movsd       xmm0,mmword ptr [__real_one (5AB31040h)]  
000000005AB2D188  jmp         __final_check (5AB2D16Eh)  
000000005AB2D18A  nop         word ptr [rax+rax]  
__process_almost_inf:
000000005AB2D190  orpd        xmm0,xmmword ptr [__enable_almost_inf (5AB31020h)]  
000000005AB2D198  jmp         __final_check (5AB2D16Eh)  
000000005AB2D19A  nop         word ptr [rax+rax]  
__process_denormal:
000000005AB2D1A0  mov         ecx,r9d  
000000005AB2D1A3  xor         r11d,r11d  
000000005AB2D1A6  comisd      xmm0,mmword ptr [__real_one (5AB31040h)]  
000000005AB2D1AE  cmovae      r11d,ecx  
000000005AB2D1B2  cmp         r11d,dword ptr [__denormal_threshold (5AB31010h)]  
000000005AB2D1B9  jne         __process_true_denormal (5AB2D1D0h)  
000000005AB2D1BB  mulsd       xmm0,mmword ptr [rsp+30h]  
000000005AB2D1C1  jmp         __final_check (5AB2D16Eh)  
000000005AB2D1C3  nop         word ptr [rax+rax]  
__process_true_denormal:
000000005AB2D1D0  xor         r8,r8  
000000005AB2D1D3  cmp         rdx,qword ptr [__denormal_tiny_threshold (5AB31060h)]  
000000005AB2D1DA  mov         r9,1  
000000005AB2D1E1  jg          __process_denormal_tiny (5AB2D210h)  
000000005AB2D1E3  add         ecx,432h  
000000005AB2D1E9  cmovs       rcx,r8  
000000005AB2D1ED  shl         r9,cl  
000000005AB2D1F0  mov         rcx,r9  
000000005AB2D1F3  mov         qword ptr [rsp+30h],rcx  
000000005AB2D1F8  mulsd       xmm0,mmword ptr [rsp+30h]  
000000005AB2D1FE  jmp         __final_check (5AB2D16Eh)  
000000005AB2D203  nop         word ptr [rax+rax]  
__process_denormal_tiny:
000000005AB2D210  movsd       xmm0,mmword ptr [__real_smallest_denormal (5AB31050h)]  
000000005AB2D218  jmp         __final_check (5AB2D16Eh)  
000000005AB2D21D  nop         dword ptr [rax]  
__y_is_zero:
000000005AB2D220  movsd       xmm1,mmword ptr [__real_zero (5AB31030h)]  
000000005AB2D228  movd        xmm0,rdx  
000000005AB2D22D  mov         r8d,dword ptr [__flag_y_zero (5AB31004h)]  
000000005AB2D234  call        _exp_special (5AAB46B0h)  
000000005AB2D239  jmp         __finish (5AB2D2A0h)  
000000005AB2D23E  xchg        ax,ax  
__y_is_inf:
000000005AB2D240  movsd       xmm1,mmword ptr [__real_inf (5AB31090h)]  
000000005AB2D248  movd        xmm0,rdx  
000000005AB2D24D  mov         r8d,dword ptr [__flag_y_inf (5AB31008h)]  
000000005AB2D254  call        _exp_special (5AAB46B0h)  
000000005AB2D259  jmp         __finish (5AB2D2A0h)  
000000005AB2D25B  nop         dword ptr [rax+rax]  
__x_is_inf_or_nan:
000000005AB2D260  cmp         rdx,qword ptr [__real_inf (5AB31090h)]  
000000005AB2D267  je          __finish (5AB2D2A0h)  
000000005AB2D269  cmp         rdx,qword ptr [__real_ninf (5AB310A0h)]  
000000005AB2D270  je          __process_zero (5AB2D290h)  
000000005AB2D272  or          rdx,qword ptr [__real_qnanbit (5AB310B0h)]  
000000005AB2D279  movd        xmm1,rdx  
000000005AB2D27E  mov         r8d,dword ptr [__flag_x_nan (5AB31000h)]  
000000005AB2D285  call        _exp_special (5AAB46B0h)  
000000005AB2D28A  jmp         __finish (5AB2D2A0h)  
000000005AB2D28C  nop         dword ptr [rax]  
__process_zero:
000000005AB2D290  movsd       xmm0,mmword ptr [__real_zero (5AB31030h)]  
000000005AB2D298  jmp         __final_check (5AB2D16Eh)  
000000005AB2D29D  nop         dword ptr [rax]  
__finish:
000000005AB2D2A0  add         rsp,88h  
000000005AB2D2A7  ret  

05的汇编

exp:
00000001400010BA  jmp         qword ptr [__imp_exp (14000A518h)] 
_RTC_InitBase:
00000001400010C0  sub         rsp,38h 
00000001400010C4  cmp         byte ptr [init (1400081A0h)],0 
00000001400010CB  jne         _RTC_InitBase+36h (1400010F6h) 
00000001400010CD  mov         r9d,1 
00000001400010D3  xor         r8d,r8d 
00000001400010D6  xor         edx,edx 
00000001400010D8  xor         ecx,ecx 
00000001400010DA  mov         byte ptr [init (1400081A0h)],1 
00000001400010E1  mov         dword ptr [rsp+20h],0 
00000001400010E9  call        _CRT_RTC_INITW (140001658h) 
00000001400010EE  mov         rcx,rax 
00000001400010F1  call        _RTC_SetErrorFuncW (140001620h) 
00000001400010F6  add         rsp,38h 
00000001400010FA  ret

很明显的差异,我估计10的精度应该更高很多

经过性能测试,05的只有10的一半性能,

100w次,05需要0.007秒~0.008,10需要0.002~0.003

由于还在上班,暂时没法上高精度计时,但是多次重复差不多都是这么个值,还是10又快又准。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值