内嵌汇编代码:
#include "r_math.h"
/**
* 以下所有函数都是对定点型的运算,该定点型的值乘以10000再以整型来存储。
*/
/**
* 将real转化为int,得到其整数部分。
*/
__declspec(naked) int __fastcall r_int(real i)
{
_asm{
mov edx,ecx;
sar edx,31;
and edx,9999;
add ecx,edx;
mov eax,1759218605;
imul ecx;
sar edx,12;
mov eax,edx;
ret;
}
}
/**
* 将real向下舍入到int,得到最接近且不大于的数。
*/
__declspec(naked) int __fastcall r_INT(real i)
{
_asm{
mov eax,1759218605;
imul ecx;
sar edx,12;
mov eax,edx;
ret;
}
}
/**
* 将ureal转化为unsigned int,得到其整数部分。
*/
__declspec(naked) unsigned int __fastcall r_int(ureal i)
{
_asm{
mov eax,3518437209;
mul ecx;
shr edx,13;
mov eax,edx;
ret;
}
}
/**
* 得到real的尾数,与r_int(real)配合使用。
*/
__declspec(naked) int __fastcall r_pot(real i)
{
_asm{
mov edx,ecx;
sar edx,31;
and edx,9999;
add edx,ecx;
mov eax,1759218605;
imul edx;
sar edx,12;
imul edx,-10000;
lea eax,[ecx+edx];
ret;
}
}
/**
* 得到real的尾数的绝对值,值为abs(r_pot(real))。
*/
__declspec(naked) int __fastcall abs_pot(real i)
{
_asm{
mov edx,ecx;
sar edx,31;
and edx,9999;
add edx,ecx;
mov eax,1759218605;
imul edx;
sar edx,12;
imul edx,-10000;
lea eax,[ecx+edx];
cdq;
xor eax,edx;
sub eax,edx;
ret;
}
}
/**
* 得到real的尾数,与r_INT(real)配合使用,结果必非负。
*/
__declspec(naked) int __fastcall r_POT(real i)
{
_asm{
mov eax,1759218605;
imul ecx;
sar edx,12;
imul edx,-10000;
lea eax,[ecx+edx];
ret;
}
}
/**
* 得到ureal的尾数,与r_int(ureal)配合使用。
*/
__declspec(naked) unsigned int __fastcall r_pot(ureal i)
{
_asm{
mov eax,3518437209;
mul ecx;
shr edx,13;
imul edx,-10000;
lea eax,[ecx+edx];
ret;
}
}
/**
* 分离real的整数部分和尾数部分,返回值r_int(real),尾数放在EDX上。
*/
__declspec(naked) int __fastcall r_spt(real i)
{
_asm{
mov edx,ecx;
sar edx,31;
and edx,9999;
add edx,ecx;
mov eax,1759218605;
imul edx;
sar edx,12;
mov eax,edx;
imul edx,-10000;
add edx,ecx;
ret;
}
}
/**
* 分离real的整数部分和尾数部分,返回值r_INT(real),尾数放在EDX上。
*/
__declspec(naked) int __fastcall r_SPT(real i)
{
_asm{
mov eax,1759218605;
imul ecx;
sar edx,12;
mov eax,edx;
imul edx,-10000;
add edx,ecx;
ret;
}
}
/**
* 分离ureal的整数部分和尾数部分,返回值r_int(ureal),尾数放在EDX上。
*/
__declspec(naked) unsigned int __fastcall r_spt(ureal i)
{
_asm{
mov eax,3518437209;
mul ecx;
shr edx,13;
mov eax,edx;
imul edx,-10000;
add edx,ecx;
ret;
}
}
/**
* 返回a乘以b的值。
*/
__declspec(naked) real __fastcall r_mul(real a,real b)
{//四舍五入
_asm{
push esi;
push edi;
mov eax,edx;
xor edx,ecx;
sar edx,31;
mov edi,edx;
xor eax,edx;
sub eax,edx;
imul ecx;
//add eax,5000;
//adc edx,0;
mov ecx,eax;
mov eax,3518437209;
mov esi,edx;
mul edx;
shr edx,13;
imul edx,-10000;
add esi,edx;
push esi;
imul esi,7296;
add ecx,esi;
sbb esi,esi;
mov eax,3518437209;
and esi,eax;
mul ecx;
pop eax;
imul eax,429496;
add edx,esi;
shr edx,13;//不必考虑CF
adc eax,edx;
xor eax,edi;
sub eax,edi;
pop edi;
pop esi;
ret;
}
/*_asm{
push ebx;
push esi;
push edi;
mov eax,edx;
cdq;
xor eax,edx;
sub eax,edx;
mov ebx,ecx;
sar ecx,31;
xor ebx,ecx;
sub ebx,ecx;
xor ecx,edx;
mul ebx;
add eax,5000;
adc edx,0;
mov ebx,eax;
mov eax,3518437209;
mov esi,edx;
mul edx;
shr edx,13;
imul edx,-10000;
add esi,edx;
imul edi,esi,429496;
imul esi,7296;
add ebx,esi;
sbb esi,esi;
and esi,3518437209;
mov eax,3518437209;
mul ebx;
add edx,esi;
shr edx,13;//不必考虑CF
lea eax,[edi+edx];
xor eax,ecx;
sub eax,ecx;
pop edi;
pop esi;
pop ebx;
ret;
}*/
}
/**
* 返回a乘以b的值。
*/
__declspec(naked) ureal __fastcall r_mul(ureal a,ureal b)
{//四舍五入
_asm{
push ebx;
mov eax,edx;
mul ecx;
mov ecx,eax;
mov eax,3518437209;
mov ebx,edx;
mul edx;
shr edx,13;
imul edx,-10000;
add ebx,edx;
push ebx;
imul ebx,7296;
add ecx,ebx;
sbb ebx,ebx;
mov eax,3518437209;
and ebx,eax;
mul ecx;
pop eax;
imul eax,429496;
add edx,ebx;
shr edx,13;//不必考虑CF
adc eax,edx;
pop ebx;
ret;
}
/*_asm{
push ebx;
mov eax,edx;
mul ecx;
add eax,5000;
adc edx,0;
mov ecx,eax;
mov eax,3518437209;
mov ebx,edx;
mul edx;
shr edx,13;
imul edx,-10000;
add ebx,edx;
push ebx;
imul ebx,7296;
add ecx,ebx;
sbb ebx,ebx;
mov eax,3518437209;
and ebx,eax;
mul ecx;
add edx,ebx;
shr edx,13;//不必考虑CF
pop eax;
imul eax,429496;
add eax,edx;
pop ebx;
ret;
}*/
}
/**
* 返回a除以b的值。
*/
__declspec(naked) real __fastcall r_div(real a,real b)
{
_asm{
xchg ecx,edx;
mov eax,20000;
imul edx;
idiv ecx;
sar eax,1;
adc eax,0;
}
/*_asm{
push ebx;
xchg ecx,edx;
mov eax,10000;
imul edx;
idiv ecx;
lea ebx,[edx+edx];
sar edx,31;
xor ebx,edx;
sub ebx,edx;
mov edx,ecx;
sar edx,31;
xor ecx,edx;
#if 1 //四舍五入
lea ebx,[ebx+edx+1];
#else //过五则入
sub ecx,edx;
#endif
sub ecx,ebx;
shr ecx,31;
cdq;
xor ecx,edx;
sub ecx,edx;
add eax,ecx;
pop ebx;
ret;
}*/
}
/**
* 返回a除以b的值。
*/
__declspec(naked) ureal __fastcall r_div(ureal a,ureal b)
{
_asm{
xchg ecx,edx;
mov eax,20000;
mul edx;
div ecx;
shr eax,1;
adc eax,0;
}
/*_asm{
xchg ecx,edx;
mov eax,10000;
mul edx;
div ecx;
shl edx,1;
#if 1 //四舍五入
sub edx,ecx;
shl edx,1;
sbb eax,-1;
#else //过五则入
sub ecx,edx;
shr ecx,31;
add eax,ecx;
#endif
ret;
}*/
}
real strtor(char *numstr,char **errpos)
{
int ch;
int i,b;
i=strtol(numstr,&numstr,10);
if(*numstr!='.')
{
if(errpos!=NULL)*errpos=numstr;
return i*10000;
}
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)
{
if(errpos!=NULL)*errpos=numstr;
return i*10000;
}
b=ch*1000;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch*100;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch*10;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b-=(4-ch)>>31;
STRTOREAL_END:
if(errpos!=NULL)*errpos=numstr;
ch=i>>31;
b^=ch;
b-=ch;
return i*10000+b;
}
ureal strtour(char *numstr,char **errpos)
{
int ch;
unsigned int i,b;
i=strtoul(numstr,&numstr,10);
if(*numstr!='.')
{
if(errpos!=NULL)*errpos=numstr;
return i*10000;
}
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)
{
if(errpos!=NULL)*errpos=numstr;
return i*10000;
}
b=ch*1000;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch*100;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch*10;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b+=ch;
numstr++;
ch=*numstr;
ch-='0';
if((ch^(ch-10))>0)goto STRTOREAL_END;
b-=(4-ch)>>31;
STRTOREAL_END:
if(errpos!=NULL)*errpos=numstr;
return i*10000+b;
}
real sine_rn(real a,int rn,double t)
{//t=121.4375,rn=18最佳
long long a2=a;
a2*=a;
a2=((a2+(1<<(rn-1)))>>rn);
long long a3=a2;
a3*=a;
long long a5=((a3+(1<<(rn-1)))>>rn);
if(a<0)a3-=3LL<<rn;
else a3+=3LL<<rn;
long long b4=6LL<<rn;
b4=a3/b4;
a5*=a2;
if(a<0)a5-=(long long)((1<<(rn-1))*t);
else a5+=(long long)((1<<(rn-1))*t);
long long b6=(long long)((1<<rn)*t);
b6=a5/b6;
return a-(int)b4+(int)b6;
}
/*
__declspec(naked) real __fastcall sine(real a)
{//计算sin的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
push ebx;
mov eax,ecx;
sar ecx,31;
xor eax,ecx;
sub eax,ecx;
push ecx;
mov ebx,eax;//abs(a)
mul ebx;
shl edx,14;
shr eax,18;
adc eax,edx;//a*a>>18
mov DWORD PTR [esp-4],eax;//a2
mul ebx;
shl edx,14;
shr eax,18;
adc edx,eax;//a*(a*a>>18)>>18
mov ecx,edx;//a3
mov eax,0x2AAAAAAB;
mul edx;
mov eax,ecx;
mov ecx,edx;//b4
mul DWORD PTR [esp-4];
shl edx,29;
shr eax,3;
adc eax,edx;
mov edx,0x86F3D980;//未确定此值及范围0x86FCAECA
mul edx;
shr edx,21;//b6
lea eax,[ebx+edx];
sub eax,ecx;
pop ecx;
xor eax,ecx;
sub eax,ecx;
pop ebx;
ret;
}
}
*/
__declspec(naked) real __stdcall sine(real a)
{//已优化为最优算法,计算sin的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
mov eax,DWORD PTR [esp+4];
imul eax;
shl edx,28;
shr eax,4;
adc eax,edx;//a*a>>4
mov ecx,eax;
mul ecx;//edx=(a*a>>4)*(a*a>>4)>>32
mov eax,0x86E0D052;//?此值未确定
mul edx;//edx=a*a*a*a*0x86F3D980>>72
xchg ecx,edx;//(ecx>>20)<<(30-18),b6
mov eax,0xAAAAAAAB;
mul edx;//edx=(a*a>>4)*2/3=(a*a>>3)/3,(edx>>16)<<(30-18),b4
shr ecx,4;
sbb edx,ecx;
mov eax,1<<30;
shr edx,4;
sbb eax,edx;
imul DWORD PTR [esp+4];
shl edx,2;
shr eax,30;
adc eax,edx;
ret 4;
}
}
__declspec(naked) real __stdcall test_sine(real a,unsigned int t)
{//测试参数t并计算其sin的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
mov eax,DWORD PTR [esp+4];
imul eax;
shl edx,28;
shr eax,4;
adc eax,edx;//a*a>>4
mov ecx,eax;
mul ecx;//edx=(a*a>>4)*(a*a>>4)>>32
mov eax,DWORD PTR [esp+8];//?此值未确定
mul edx;//edx=a*a*a*a*0x86F3D980>>72
xchg ecx,edx;//(ecx>>20)<<(30-18),b6
mov eax,0xAAAAAAAB;
mul edx;//edx=(a*a>>4)*2/3=(a*a>>3)/3,(edx>>16)<<(30-18),b4
shr ecx,4;
sbb edx,ecx;
mov eax,1<<30;
shr edx,4;
sbb eax,edx;
imul DWORD PTR [esp+4];
shl edx,2;
shr eax,30;
adc eax,edx;
ret 8;
}
}
real cose_rn(real a,int rn,double t)
{//t=724.5,rn=18最佳
unsigned long long a2,b2,a4,b4,a6,b6;
a2=(long long)a*(long long)a;
b2=(a2+(1<<rn))/(1<<(rn+1));
a2=((a2+(1<<(rn-1)))>>rn);
a4=a2*a2;
b4=24*(1<<rn);
b4=(a4+12*(1<<rn))/b4;
a4=((a4+(1<<(rn-1)))>>rn);
a6=a4*a2;
a6+=(unsigned long long)((1<<(rn-1))*t);
b6=(unsigned long long)((1<<rn)*t);
b6=a6/b6;
return (1<<rn)-(int)b2+(int)b4-(int)b6;
}
/*
__declspec(naked) real __fastcall cose(real a)
{//计算cos的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
push ebx;
mov eax,ecx;
imul ecx;
shl edx,13;
lea ecx,[edx+edx];
shr eax,18;
adc ecx,eax;//a2=a*a>>18
shr eax,1;
adc edx,eax;//b2=a*a>>19
mov DWORD PTR [esp-4],edx;//b2
mov eax,ecx;
mul ecx;
shl edx,14;
shr eax,18;
lea ebx,[eax+edx];//a4=(a*a>>18)*(a*a>>18)>>18
mov eax,0x0AAAAAAB;
mul ebx;
xchg edx,ecx;//a2
mov eax,ebx;
mul edx;
shl edx,30;
shr eax,2;
adc eax,edx;
mov edx,0xB4689F20;//取值范围[0XB44F31AD,0XB44F3512]
mul edx;
shr edx,25;
sub ecx,edx;
lea eax,[ecx+(1<<18)];
sub eax,DWORD PTR [esp-4];
pop ebx;
ret;
}
}
*/
__declspec(naked) real __fastcall cose(real a)
{//已优化为最优算法,计算cos的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
mov eax,ecx;
imul ecx;
shl edx,28;
shr eax,4;
adc eax,edx;//a2=a*a>>4
//push eax;//b2=eax>>15
mov DWORD PTR [esp-4],eax;
mov ecx,eax;
mul ecx;
mov eax,edx;//a4=(a*a>>4)*(a*a>>4)>>32
xchg ecx,edx;
mul edx;
mov eax,0xB46A73F7;//取值范围[0XB44F31AD,0XB44F3512]
mul edx;
xchg ecx,edx;
mov eax,0xAAAAAAAB;
mul edx;
//ecx=30bit,edx=31bit,eax=32bit
//pop eax;
//shr eax,15;
//shr edx,18;
//shr ecx,23;
shr ecx,5;
sbb edx,ecx;
shr edx,3;
//不舍入以减少指令
sub edx,DWORD PTR [esp-4];
shr edx,15;
adc edx,1<<17;
mov eax,edx;
/*
mov ecx,DWORD PTR [esp-4];
sbb ecx,edx;
shr ecx,15;
mov eax,1<<18;
sbb eax,ecx;
*/
ret;
}
}
__declspec(naked) real __fastcall test_cose(real a,unsigned int t)
{//测试参数t并计算其cos的值,采用二进制18位小数的定点数,参数a<=205888/(2^18).
_asm{
mov DWORD PTR [esp-8],edx;
mov eax,ecx;
imul ecx;
shl edx,28;
shr eax,4;
adc eax,edx;//a2=a*a>>4
//push eax;//b2=eax>>15
mov DWORD PTR [esp-4],eax;
mov ecx,eax;
mul ecx;
mov eax,edx;//a4=(a*a>>4)*(a*a>>4)>>32
xchg ecx,edx;
mul edx;
mov eax,DWORD PTR [esp-8];//取值范围[0XB44F31AD,0XB44F3512]
mul edx;
xchg ecx,edx;
mov eax,0xAAAAAAAB;
mul edx;
//ecx=30bit,edx=31bit,eax=32bit
//pop eax;
//shr eax,15;
//shr edx,18;
//shr ecx,23;
shr ecx,5;
sbb edx,ecx;
shr edx,3;
//不舍入以减少指令
sub edx,DWORD PTR [esp-4];
shr edx,15;
adc edx,1<<17;
mov eax,edx;
/*
mov ecx,DWORD PTR [esp-4];
sbb ecx,edx;
shr ecx,15;
mov eax,1<<18;
sbb eax,ecx;
*/
ret;
}
}
头文件如下:
#include <stdlib.h>
#define r_set(a,b) ((a)*10000+(b))
#define r_add(a,b) ((a)+(b))
#define r_sub(a,b) ((a)-(b))
#define rn_set(a,b,n) (((a)<<(n))+(b))
#define rn_int(i,n) ((i)>>(n))
#define rn_pot(i,n) ((unsigned int)((i)<<(32-(n)))>>(32-(n)))
#define ri_int(i,n) (((int)(((unsigned int)((int)(i)>>31)>>(32-(n)))+(i))>>(n)))
#define iabs_pot(i,n) abs(ri_pot(i,n))
#define abs(i) (((i)^((int)(i)>>31))-((int)(i)>>31))
#define ri_pot(i,n) ((i)-((((unsigned int)((int)(i)>>31)>>(32-(n)))+(i))&((-1)<<(n))))
#define ri_pot2(i,n) (((((unsigned int)((int)(i)>>31)>>(32-(n)))+(i))&((unsigned int)(-1)>>(32-(n))))-((unsigned int)((int)(i)>>31)>>(32-(n))))
#define rn_mul(c,a,b,n) /*至半则入(类似四舍五入)*/\
__asm mov eax,a \
__asm mov edx,b \
__asm mul edx \
__asm shl edx,32-(n) \
__asm shr eax,n \
__asm adc eax,edx \
__asm mov c,eax
#define rn_imul(c,a,b,n) /*至半则入*/\
__asm mov eax,a \
__asm mov edx,b \
__asm imul edx \
__asm shl edx,32-(n) \
__asm shr eax,n \
__asm adc eax,edx \
__asm mov c,eax
#define rn_div(c,a,b,n) /*至半则入*/\
__asm mov eax,a \
__asm mov ecx,b \
__asm mov edx,eax \
__asm shr edx,31-(n) \
__asm shl eax,(n)+1 \
__asm div ecx \
__asm sar eax,1 \
__asm adc eax,0 \
__asm mov c,eax \
//__asm mov eax,a \
__asm mov edx,eax \
__asm shl eax,n \
__asm shr edx,32-(n) \
__asm mov ecx,b \
__asm div ecx \
__asm shl edx,1 \
__asm sub edx,ecx __asm shl edx,1 __asm sbb eax,-1 \
/*__asm sub ecx,edx __asm shr ecx,31 __asm add eax,ecx \*/\
__asm mov c,eax
#define rn_idiv(c,a,b,n) /*至半则入*/\
__asm mov eax,a \
__asm mov ecx,b \
__asm mov edx,eax \
__asm shr edx,31-(n) \
__asm shl eax,(n)+1 \
__asm idiv ecx \
__asm sar eax,1 \
__asm adc eax,0 \
__asm mov c,eax \
//__asm mov eax,a \
__asm mov edx,eax \
__asm shl eax,n \
__asm sar edx,32-(n) \
__asm mov ecx,b \
__asm idiv ecx \
__asm push eax \
__asm lea eax,[edx+edx] \
__asm sar edx,31 \
__asm xor eax,edx \
__asm sub eax,edx \
__asm mov edx,ecx \
__asm sar edx,31 \
__asm xor ecx,edx \
__asm lea eax,[eax+edx+1] \
/*__asm sub ecx,edx */\
__asm sub ecx,eax \
__asm shr ecx,31 \
__asm pop eax \
__asm cdq \
__asm xor ecx,edx \
__asm sub ecx,edx \
__asm add eax,ecx \
__asm mov c,eax
typedef int real;
typedef unsigned int ureal;
int __fastcall r_int(real i);
int __fastcall r_INT(real i);
unsigned int __fastcall r_int(ureal i);
int __fastcall r_pot(real i);
int __fastcall abs_pot(real i);
int __fastcall r_POT(real i);
unsigned int __fastcall r_pot(ureal i);
int __fastcall r_spt(real i);
int __fastcall r_SPT(real i);
unsigned int __fastcall r_spt(ureal i);
real __fastcall r_mul(real a,real b);
ureal __fastcall r_mul(ureal a,ureal b);
real __fastcall r_div(real a,real b);
ureal __fastcall r_div(ureal a,ureal b);
real strtor(char *numstr,char **errpos);
ureal strtour(char *numstr,char **errpos);
real sine_rn(real a,int rn,double t);
real __stdcall sine(real a);
real __stdcall test_sine(real a,unsigned int t);
real cose_rn(real a,int rn,double t);
real __fastcall cose(real a);
real __fastcall test_cose(real a,unsigned int t);