96年的夏天,我刚上大一下学期,那时学习的语言是fortan 77,上机是VME中型机,操作系统是Novell,教课的是颇有名气的薛学勤教授,我利用业余时间用这门语言做了两个程序,一个是类似autocad的全屏二维平面设计程序,另一个就是求一百位PI值,很不幸,在写一百位pi值的时候,被薛老师发现了,他告诫我说应该把基础打好,不要一上来就写复杂的程序,这样对自己发展不好。最后的结果是我的写完了这个程序,总共1000行左右,并且把它稍微改动了一下,又求出了100位自然对数的值,并把结果打印到了最后留给我们的打印纸上,同时我也因此被迫写了一份大学唯一的检查。呵呵。
我的那份程序的源代码早已丢失,好在其算法还记得,就是利用计算PI的级数公式:
atan(X)=X-X^3/3+X^5/5-...
PI=4*(atan(1))
atan(1)=4*atan(1/5)-atan(1/239)
这个公式是我高中的时候看一本《实数》的小册子读到的,很不错,没用多少步(少于30步)就得到了想要的精度。
为了达到100位的精度,四则运算过程都要保持一百位以上的精度,为了简单,特规定了几组30长的整数数组,(乘法需要加倍),每个整数保存10进制的4位值。(Fortan77中整形是16位的)
比如两个数相乘,(fortan语法忘光了,故用C++描述):
vector<int> bigmul(vector<int> A, vector<int> B) // a*b
{
vector<int> result(A.size()+B.size()+1);
int i,j;
for(i=0; i<B.size(); i++)
{
if(B[i]==0) continue;
for(j=0; j<A.size(); j++)
{
if(A[j]==0) continue;
result[i+j]+=A[j]*B[i];
result[i+j+1]+=result[i+j]/10000;
result[i+j]%=10000;
}
}
return result;
}
计算pi所用的公式很落后。当然,这是受当时的知识水平所局限。
现代计算PI常用的公式有:
426880 * sqrt(10005) / PI = ∑(0,N)(0,N,(6n)!(545140134n+13591409)/((n!)^3)(3n)!(-640320)^3n)
PI=∑(0,N)(1,N,(1+1(4n*n-1)))/sum(1,N,1/(4*n*n-1))
PI=∑(0,N)(4/(8n+1)-2/(8n+4)-1/(8n+5)-1/(8n+6))/16^n
数字切分则有更高效的二进制切分算法:
/*
* Compute the numerator P and denominator Q of
* P/Q = 1/(N0+1) + 1/(N0+1)/(N0+2) + ... + 1/(N0+1)/.../N1
*/
void BinarySplittingE(long N0, long N1, BigInt * P, BigInt * Q)
{
BigInt PP, QQ;
long NMid;
if (N1-N0==1) {
P->Size = Q->Size = 1;
P->Coef[0] = 1.;
Q->Coef[0] = (double) N1;
UpdateBigInt(P);
UpdateBigInt(Q);
return;
}
NMid = (N0+N1)/2;
BinarySplittingE(N0,NMid,P,Q);
/* To save memory, take the non used coefficients of P and Q
for coefficient of the BigInt used in the second splitting part */
PP.Coef = P->Coef + P->Size;
PP.SizeMax = P->SizeMax-P->Size;
QQ.Coef = Q->Coef + Q->Size;
QQ.SizeMax = Q->SizeMax-Q->Size;
BinarySplittingE(NMid,N1,&PP,&QQ);
MulBigInt(P,&QQ,&tmpBigInt);
AddBigInt(&tmpBigInt,&PP,P);
MulBigInt(Q,&QQ,Q);
}
如果大家对大数运算有了解的话,就会很清楚,这个算法太弱了,
利用对大数做fft变换后,利用其系数正交性,运算复杂度会从O(n^2)降低到O(nLog(n)).
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define PI 3.1415926535897932384626
typedef struct ComplexTag {
Real R,I;
} Complex;
long FFTLengthMax;
Complex * OmegaFFT;
Complex * ArrayFFT0, * ArrayFFT1;
Complex * ComplexCoef;
double FFTSquareWorstError;
long AllocatedMemory;
void InitializeFFT(long MaxLength)
{
long i;
Real Step;
FFTLengthMax = MaxLength;
OmegaFFT = (Complex *) malloc(MaxLength/2*sizeof(Complex));
ArrayFFT0 = (Complex *) malloc(MaxLength*sizeof(Complex));
ArrayFFT1 = (Complex *) malloc(MaxLength*sizeof(Complex));
ComplexCoef = (Complex *) malloc(MaxLength*sizeof(Complex));
Step = 2.*PI/(double) MaxLength;
for (i=0; 2*i<MaxLength; i++) {
OmegaFFT[i].R = cos(Step*(double)i);
OmegaFFT[i].I = sin(Step*(double)i);
}
FFTSquareWorstError=0.;
AllocatedMemory = 7*MaxLength*sizeof(Complex)/2;
}
void RecursiveFFT(Complex * Coef, Complex * FFT, long Length, long Step,
long Sign)
{
long i, OmegaStep;
Complex * FFT0, * FFT1, * Omega;
Real tmpR, tmpI;
if (Length==2) {
FFT[0].R = Coef[0].R + Coef[Step].R;
FFT[0].I = Coef[0].I + Coef[Step].I;
FFT[1].R = Coef[0].R - Coef[Step].R;
FFT[1].I = Coef[0].I - Coef[Step].I;
return;
}
FFT0 = FFT;
RecursiveFFT(Coef ,FFT0,Length/2,Step*2,Sign);
FFT1 = FFT+Length/2;
RecursiveFFT(Coef+Step,FFT1,Length/2,Step*2,Sign);
Omega = OmegaFFT;
OmegaStep = FFTLengthMax/Length;
for (i=0; 2*i<Length; i++, Omega += OmegaStep) {
/* Recursion formula for FFT :
FFT[i] <- FFT0[i] + Omega*FFT1[i]
FFT[i+Length/2] <- FFT0[i] - Omega*FFT1[i],
Omega = exp(2*I*PI*i/Length) */
tmpR = Omega[0].R*FFT1[i].R-Sign*Omega[0].I*FFT1[i].I;
tmpI = Omega[0].R*FFT1[i].I+Sign*Omega[0].I*FFT1[i].R;
FFT1[i].R = FFT0[i].R - tmpR;
FFT1[i].I = FFT0[i].I - tmpI;
FFT0[i].R = FFT0[i].R + tmpR;
FFT0[i].I = FFT0[i].I + tmpI;
}
}
/* Compute the complex Fourier Transform of Coef into FFT */
void FFT(Real * Coef, long Length, Complex * FFT, long NFFT)
{
long i;
/* Transform array of real coefficient into array of complex */
for (i=0; i<Length; i++) {
ComplexCoef[i].R = Coef[i];
ComplexCoef[i].I = 0.;
}
for (; i<NFFT; i++)
ComplexCoef[i].R = ComplexCoef[i].I = 0.;
RecursiveFFT(ComplexCoef,FFT,NFFT,1,1);
}
/* Compute the inverse Fourier Transform of FFT into Coef */
void InverseFFT(Complex * FFT, long NFFT, Real * Coef, long Length)
{
long i;
Real invNFFT = 1./(Real) NFFT, tmp;
RecursiveFFT(FFT, ComplexCoef, NFFT, 1, -1);
for (i=0; i<Length; i++) {
/* Closest integer to ComplexCoef[i].R/NFFT */
tmp = invNFFT*ComplexCoef[i].R;
Coef[i] = floor(0.5+tmp);
if ((tmp-Coef[i])*(tmp-Coef[i])>FFTSquareWorstError)
FFTSquareWorstError = (tmp-Coef[i])*(tmp-Coef[i]);
}
}
void Convolution(Complex * A, Complex * B, long NFFT, Complex * C)
{
long i;
Real tmpR, tmpI;
for (i=0; i<NFFT; i++) {
tmpR = A[i].R*B[i].R-A[i].I*B[i].I;
tmpI = A[i].R*B[i].I+A[i].I*B[i].R;
C[i].R = tmpR;
C[i].I = tmpI;
}
}
void MulWithFFT(Real * ACoef, long ASize,
Real * BCoef, long BSize,
Real * CCoef)
{
long NFFT = 2;
while (NFFT<ASize+BSize)
NFFT *= 2;
if (NFFT>FFTLengthMax) {
printf("Error, FFT Size is too big in MulWithFFT\n");
return;
}
FFT(ACoef, ASize, ArrayFFT0, NFFT);
FFT(BCoef, BSize, ArrayFFT1, NFFT);
Convolution(ArrayFFT0,ArrayFFT1,NFFT,ArrayFFT0);
InverseFFT(ArrayFFT0,NFFT,CCoef, ASize+BSize-1);
}
NTT(Number theoretic transforms)则巧妙的使用了孫子定理和质数性质,
将运算过程限定在一个特定的整数范围,避免了FFT快速乘法因浮点运算而带来的误差。基本思想则和上者相同。
// The core of the apfloat multiplication
// Return a new apstruct
apstruct *apmul (apstruct *a, apstruct *b)
{
int i, sign;
size_t n, prec, size, rsize;
apstruct *ap;
size_t trueasize, truebsize;
assert (a); // Won't work on uninitialized apfloats
assert (b);
sign = a->sign * b->sign;
if (!sign) return new apstruct; // Zero
prec = min (a->prec, b->prec);
size = min (prec, a->size + b->size);
trueasize = min (a->size, prec);
truebsize = min (b->size, prec);
n = rnd23up (trueasize + truebsize);
rsize = min (n, size + 2);
if (a == b)
ap = autoconvolution (a, rsize, trueasize, n, &i);
else
ap = convolution (a, b, rsize, trueasize, truebsize, n, &i);
size -= lastzeros (ap, size);
ap->relocate (DEFAULT, size);
ap->sign = sign;
ap->prec = prec;
ap->exp = a->exp + b->exp - 1 + i;
return ap;
}
// Round up to the nearest power of two or three times a power of two
size_t rnd23up (size_t x)
{
size_t r = 1;
if (!x) return 0;
while (r < x)
{
if (r == 1)
r = 2;
else if (r == (r & -r))
r = r / 2 * 3;
else
r = r / 3 * 4;
}
return r;
}
// Convolution of the mantissas in s1 and s2
// Use sizes s1size and s2size correspondingly
// Result size is rsize
// Transform length is n
// *i = 1 if right shift occurred, otherwise 0
apstruct *convolution (apstruct *s1, apstruct *s2, size_t rsize, size_t s1size, size_t s2size, size_t n, int *i)
{
int location = (n > Maxblocksize ? DISK : MEMORY);
apstruct *tmp1;
apstruct *tmp2;
apstruct *tmp3;
apstruct *tmp4;
if (MAXTRANSFORMLENGTH)
assert (n <= MAXTRANSFORMLENGTH); // Otherwise it won't work
setmodulus (moduli[2]);
tmp2 = new apstruct (*s2, s2size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp2->openstream ();
tabletwopassfnttrans (fs, primitiveroots[2], 1, n);
tmp2->closestream ();
}
else
{
modint *data = tmp2->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[2], 1, n);
tmp2->putdata ();
tmp2->relocate (DEFAULT);
}
tmp1 = new apstruct (*s1, s1size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp1->openstream ();
tabletwopassfnttrans (fs, primitiveroots[2], 1, n);
tmp1->closestream ();
}
else
{
modint *data = tmp1->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[2], 1, n);
tmp1->putdata ();
}
multiplyinplace (tmp1, tmp2);
delete tmp2;
if (location != MEMORY)
{
fstream &fs = tmp1->openstream ();
itabletwopassfnttrans (fs, primitiveroots[2], -1, n);
tmp1->closestream ();
}
else
{
modint *data = tmp1->getdata (0, n);
itablesixstepfnttrans (data, primitiveroots[2], -1, n);
tmp1->putdata ();
tmp1->relocate (DEFAULT);
}
setmodulus (moduli[1]);
tmp3 = new apstruct (*s2, s2size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp3->openstream ();
tabletwopassfnttrans (fs, primitiveroots[1], 1, n);
tmp3->closestream ();
}
else
{
modint *data = tmp3->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[1], 1, n);
tmp3->putdata ();
tmp3->relocate (DEFAULT);
}
tmp2 = new apstruct (*s1, s1size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp2->openstream ();
tabletwopassfnttrans (fs, primitiveroots[1], 1, n);
tmp2->closestream ();
}
else
{
modint *data = tmp2->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[1], 1, n);
tmp2->putdata ();
}
multiplyinplace (tmp2, tmp3);
delete tmp3;
if (location != MEMORY)
{
fstream &fs = tmp2->openstream ();
itabletwopassfnttrans (fs, primitiveroots[1], -1, n);
tmp2->closestream ();
}
else
{
modint *data = tmp2->getdata (0, n);
itablesixstepfnttrans (data, primitiveroots[1], -1, n);
tmp2->putdata ();
tmp2->relocate (DEFAULT);
}
setmodulus (moduli[0]);
tmp4 = new apstruct (*s2, s2size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp4->openstream ();
tabletwopassfnttrans (fs, primitiveroots[0], 1, n);
tmp4->closestream ();
}
else
{
modint *data = tmp4->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[0], 1, n);
tmp4->putdata ();
tmp4->relocate (DEFAULT);
}
tmp3 = new apstruct (*s1, s1size, location, n);
if (location != MEMORY)
{
fstream &fs = tmp3->openstream ();
tabletwopassfnttrans (fs, primitiveroots[0], 1, n);
tmp3->closestream ();
}
else
{
modint *data = tmp3->getdata (0, n);
tablesixstepfnttrans (data, primitiveroots[0], 1, n);
tmp3->putdata ();
}
multiplyinplace (tmp3, tmp4);
delete tmp4;
if (location != MEMORY)
{
fstream &fs = tmp3->openstream ();
itabletwopassfnttrans (fs, primitiveroots[0], -1, n);
tmp3->closestream ();
}
else
{
modint *data = tmp3->getdata (0, n);
itablesixstepfnttrans (data, primitiveroots[0], -1, n);
tmp3->putdata ();
}
*i = carrycrt (tmp3, tmp2, tmp1, rsize);
delete tmp1;
delete tmp2;
// Return value can remain in memory
return tmp3;
}
// Linear multiplication in the number theoretic domain
// Assume that ds1 will be in memory if possible
void multiplyinplace (apstruct *ds1, apstruct *s2)
{
size_t t, p = ds1->size, l, r = 0;
modint *buf1, *buf2;
while (p)
{
l = (p < Blocksize ? p : Blocksize);
p -= l;
buf1 = ds1->getdata (r, l);
buf2 = s2->getdata (r, l);
r += l;
for (t = 0; t < l; t++)
buf1[t] *= buf2[t];
ds1->putdata ();
s2->cleardata ();
}
}
圆周率的精确解
最新推荐文章于 2024-07-15 22:10:26 发布