typedef unsigned int V_UINT32;
typedef int V_INT32;
V_UINT32 VSqrt1( V_UINT32 x ) //简单二分查找
{
V_UINT32 a = 1, b = ( x >> 5) + 8, m;
if ( b > 65535 ) b = 65535;
do {
m = ( a + b ) >> 1;
if ( m*m > x ) b = m -1;
else a = m + 1;
} while( b >= a );
return a - 1;
}
{
V_UINT32 a = 1, b = ( x >> 5) + 8, m;
if ( b > 65535 ) b = 65535;
do {
m = ( a + b ) >> 1;
if ( m*m > x ) b = m -1;
else a = m + 1;
} while( b >= a );
return a - 1;
}
/***************************************/
V_UINT32 VSqrt2( V_UINT32 x ) //硬件算法(与硬件有关)
{
V_UINT32 m, y, b;
m = 0x40000000;
y = 0;
while ( m != 0 ) {
b = y | m;
y = y >> 1;
if ( x >= b )
{
x = x - b;
y = y | m;
}
m = m >> 2;
}
return y;
}
{
V_UINT32 m, y, b;
m = 0x40000000;
y = 0;
while ( m != 0 ) {
b = y | m;
y = y >> 1;
if ( x >= b )
{
x = x - b;
y = y | m;
}
m = m >> 2;
}
return y;
}
/***************************************/
V_UINT32 VSqrt3( V_UINT32 x ) //牛顿法
{
V_UINT32 x1;
int s = 1, g0,g1;
if (x <= 1) return x;
x1 = x - 1;
if ( x1 > 65535 )
{
s += 8;
x1 >>= 16;
}
if ( x1 > 255 )
{
s += 4;
x1 >>= 8;
}
if ( x1 > 15 )
{
s += 2;
x1 >>= 4;
}
if ( x1 > 3 )
{
s += 1;
}
g0 = 1 << s;
g1 = ( g0 + ( x >> s ) ) >> 1;
while ( g1 < g0 ) {
g0 = g1;
g1 = ( g0 + x/g0 ) >> 1;
}
return g0;
}
{
V_UINT32 x1;
int s = 1, g0,g1;
if (x <= 1) return x;
x1 = x - 1;
if ( x1 > 65535 )
{
s += 8;
x1 >>= 16;
}
if ( x1 > 255 )
{
s += 4;
x1 >>= 8;
}
if ( x1 > 15 )
{
s += 2;
x1 >>= 4;
}
if ( x1 > 3 )
{
s += 1;
}
g0 = 1 << s;
g1 = ( g0 + ( x >> s ) ) >> 1;
while ( g1 < g0 ) {
g0 = g1;
g1 = ( g0 + x/g0 ) >> 1;
}
return g0;
}
/***************************************/
V_UINT32 VSqrt4( V_UINT32 n ) //牛顿法(原项目代码使用的方法)
{
V_UINT32 nextTrial=n/2;
V_UINT32 currentAnswer;
if(n<=1)return n;
do{
currentAnswer=nextTrial;
nextTrial=(nextTrial+n/nextTrial)/2;
}while(nextTrial<currentAnswer);
return currentAnswer;
}
{
V_UINT32 nextTrial=n/2;
V_UINT32 currentAnswer;
if(n<=1)return n;
do{
currentAnswer=nextTrial;
nextTrial=(nextTrial+n/nextTrial)/2;
}while(nextTrial<currentAnswer);
return currentAnswer;
}
测试平台:PXA255+Linux/GCC
///测试结果:数值较小时,VSqrt1效率最高;数值较大时,VSqrt3效率最高。